def check_for_new_cas(tab_manager=None): # Make a list of CAS numbers to run through SciFinder n_p = pd.DataFrame({'not_perf':cat_rec.Categorize_CAS(tab_manager=tab_manager).get_corrected_not_perf()}) n_p['keep'] = n_p.not_perf.map(lambda x : ct.is_valid_CAS_code(x)) n_p[n_p.keep].to_csv(tmpdir+'cas_to_check.csv',index=False)
def _clean_CAS_for_comparison(self): #print('clean cas for comparison') #self.cas_field_cat.rename({'original':'CASNumber'},inplace=True,axis=1) self.cas_field_cat[ 'cas_clean'] = self.cas_field_cat.CASNumber.str.replace( r'[^0-9-]', '') self.cas_field_cat[ 'zero_corrected'] = self.cas_field_cat.cas_clean.map( lambda x: ct.correct_zeros(x))
def examine_not_valid(self): # !!!! DONT USE THIS YET cntr = 1 for tup in self.reviewed: if self.reviewed[tup] == False: if not ct.is_valid_CAS_code(tup[0]): print('\n\n', tup, cntr, '\n\n') cntr += 1 self._add_to_results_dic(tup[0], tup[1], 'non_valid_cas', 8)
def _get_field_dic(self): """takes the precompiled field_dic and creates casig_orig df from it.""" with open(field_dic_fn, 'rb') as f: field_dic = pickle.load(f) self.casig_orig = field_dic['casig'] self.casig_orig['cas_clean'] = self.casig_orig.CASNumber.str.replace( r'[^0-9-]', '') self.casig_orig['zero_corrected'] = self.casig_orig.cas_clean.map( lambda x: ct.correct_zeros(x)) # now the Ingredients self.casig_orig['ig_clean'] = self.casig_orig.IngredientName.str.strip( ).str.lower() self.casig_orig.drop_duplicates(inplace=True) self.casig_orig.to_csv(outdir + 'temp.csv', quotechar='$', quoting=csv.QUOTE_ALL)
def _clean_CAS_for_comparison(self): self.cas_field_cat['cas_clean'] = self.cas_field_cat.CASNumber.str.replace(r'[^0-9-]','') self.cas_field_cat['zero_corrected'] = self.cas_field_cat.cas_clean.map(lambda x: ct.correct_zeros(x) ) # replace the handful of obsolete cas numbers with widely used number self.cas_field_cat.zero_corrected = self.cas_field_cat.zero_corrected.map(lambda x: self._replace_obsolete(x))