Example #1
0
def check_for_new_cas(tab_manager=None):
    # Make a list of CAS numbers to run through SciFinder

    n_p = pd.DataFrame({'not_perf':cat_rec.Categorize_CAS(tab_manager=tab_manager).get_corrected_not_perf()})
    n_p['keep'] = n_p.not_perf.map(lambda x : ct.is_valid_CAS_code(x))

    n_p[n_p.keep].to_csv(tmpdir+'cas_to_check.csv',index=False)
Example #2
0
 def _clean_CAS_for_comparison(self):
     #print('clean cas for comparison')
     #self.cas_field_cat.rename({'original':'CASNumber'},inplace=True,axis=1)
     self.cas_field_cat[
         'cas_clean'] = self.cas_field_cat.CASNumber.str.replace(
             r'[^0-9-]', '')
     self.cas_field_cat[
         'zero_corrected'] = self.cas_field_cat.cas_clean.map(
             lambda x: ct.correct_zeros(x))
Example #3
0
 def examine_not_valid(self):
     # !!!! DONT USE THIS YET
     cntr = 1
     for tup in self.reviewed:
         if self.reviewed[tup] == False:
             if not ct.is_valid_CAS_code(tup[0]):
                 print('\n\n', tup, cntr, '\n\n')
                 cntr += 1
                 self._add_to_results_dic(tup[0], tup[1], 'non_valid_cas',
                                          8)
    def _get_field_dic(self):
        """takes the precompiled field_dic and creates casig_orig df from it."""
        with open(field_dic_fn, 'rb') as f:
            field_dic = pickle.load(f)
        self.casig_orig = field_dic['casig']
        self.casig_orig['cas_clean'] = self.casig_orig.CASNumber.str.replace(
            r'[^0-9-]', '')
        self.casig_orig['zero_corrected'] = self.casig_orig.cas_clean.map(
            lambda x: ct.correct_zeros(x))

        # now the Ingredients
        self.casig_orig['ig_clean'] = self.casig_orig.IngredientName.str.strip(
        ).str.lower()
        self.casig_orig.drop_duplicates(inplace=True)

        self.casig_orig.to_csv(outdir + 'temp.csv',
                               quotechar='$',
                               quoting=csv.QUOTE_ALL)
Example #5
0
    def _clean_CAS_for_comparison(self):

        self.cas_field_cat['cas_clean'] = self.cas_field_cat.CASNumber.str.replace(r'[^0-9-]','')
        self.cas_field_cat['zero_corrected'] = self.cas_field_cat.cas_clean.map(lambda x: ct.correct_zeros(x) )
        # replace the handful of obsolete cas numbers with widely used number
        self.cas_field_cat.zero_corrected = self.cas_field_cat.zero_corrected.map(lambda x: self._replace_obsolete(x))