def main(): if len(sys.argv) < 2: print "Usage: python check.py <plugin_dir>" return sys.exit(1) plugin_dir = sys.argv[1] if not os.path.isdir(plugin_dir): print "Invalid plugin dir: " + plugin_dir return sys.exit(1) normalize.Normalizer(plugin_dir, test_only=True).normalize()
def standardizeMolVS(inMol): f = fragment.LargestFragmentChooser() outMol = f.choose(inMol) c = charge.Uncharger() outMol = c.uncharge(outMol) s = Standardizer() outMol = s.standardize(outMol) n = normalize.Normalizer() outMol = n.normalize(outMol) t = tautomer.TautomerCanonicalizer() outMol = t.canonicalize(outMol) # Transform with Inchi #print "inMol" #print Chem.MolToSmiles(inMol) #inchi = Chem.inchi.MolToInchi(inMol) #print inchi #print "outMol" #print Chem.MolToSmiles(outMol) #inchi = Chem.inchi.MolToInchi(outMol) #print inchi #outMol = Chem.inchi.MolFromInchi(inchi) return outMol
kf = KFold(n_splits=8) learning_rate = 1e-2 batch_size = 30 max_epoch = 1000 acceptable_loss = 1e-2 score_list = [] highest_score = 0 model = None for train_index, test_index in kf.split(input_matrix): input_train, class_train = input_matrix[train_index], class_label[ train_index] input_test, class_test = input_matrix[test_index], class_label[ test_index] train_normalizer = nm.Normalizer() train_normalizer.fit(input_train) input_train = train_normalizer.transform(input_train, scale_to_range=(-1.5, 1.5)) input_test = train_normalizer.transform(input_test, scale_to_range=(-1.5, 1.5)) nn_layers = create_nn() cost = np.inf moving_cost = np.inf epoch = 0 cost_list = []
def create_router(config): """Given a config object, returns the router and output dictionaries """ router = {} outputs = {} sections = config.sections() langs = [x for x in sections if x not in ['service', 'external', 'codes']] langs = sorted(list(set(langs))) langmap = {k: v for k, v in config.items('codes')} logging.info('languages in configuration: {}'.format(str(langs))) stanford_ner = config.get('external', 'stanford_ner') stanford_ner = os.path.abspath(stanford_ner) stanford_pos = config.get('external', 'stanford_pos') stanford_pos = os.path.abspath(stanford_pos) for lang in langs: logging.info('loading config for {}'.format(lang)) router[lang] = {} outputs[lang] = set() # tokenizer tokenizer = config.get(lang, 'tokenizer') if tokenizer == 'twokenizer': router[lang]['tokenizer'] = twokenize.tokenize elif tokenizer == 'apostrophes': router[lang]['tokenizer'] = twokenize.tokenize_apostrophes else: msg = 'No such tokenizer: {}'.format(tokenizer) raise KeyError(msg) # preprocessor preprocessor = config.get(lang, 'preprocessor') if preprocessor == 'twokenizer': router[lang]['preprocessor'] = twokenize.preprocess else: msg = 'No such preprocessor: {}'.format(preprocessor) raise KeyError(msg) # ngrams n = 3 try: n = config.getint(lang, 'ngrams') except: pass router[lang]['ngrams'] = partial(ngrams, n=n) out = False try: out = config.getboolean(lang, 'ngrams_out') except: pass if out: outputs[lang].add('ngrams') # normalizer t = 'basic' try: t = config.get(lang, 'normalizer_type') except: pass if t == 'basic': model = normalize.Normalizer(langmap[lang]) normalizer = partial(normalize.normalize, model=model) router[lang]['normalizer'] = normalizer else: msg = 'No such normalizer: {}'.format(t) raise KeyError(msg) out = False try: out = config.getboolean(lang, 'normalizer_out') except: pass if out: outputs[lang].add('normalizer') # sentiment try: sentiment_model = config.get(lang, 'sentiment_model') out = config.getboolean(lang, 'sentiment_out') model = sgd.load(sentiment_model) classifier = partial(sgd.classify, clf=model) if out: router[lang]['sentiment'] = classifier outputs[lang].add('sentiment') else: logging.warning('No sentiment classifier for: {}'.format(lang)) except Exception as ex: logging.warning('No sentiment classifier for: {}'.format(lang)) logging.exception(ex) # ner if config.has_option(lang, 'ner_model'): t = 'stanford' model = None try: # Get config variables for NER t = config.get(lang, 'ner_type') ner_model = config.get(lang, 'ner_model') out = config.getboolean(lang, 'ner_out') # NER model type switch if t == 'stanford': model = seq.load_ner(stanford_ner, ner_model) classifier = partial(seq.ner_tag, model=model) else: msg = 'No such NER type: {}'.format(t) raise KeyError(msg) # Check output if out and model is not None: router[lang]['ner'] = classifier outputs[lang].add('ner') else: logging.warning('No NER for: {}'.format(lang)) except Exception as ex: logging.warning('No NER for: {}'.format(lang)) logging.exception(ex) # pos if config.has_option(lang, 'pos_model'): t = 'stanford' model = None try: t = config.get(lang, 'pos_type') pos_model = config.get(lang, 'pos_model') out = config.getboolean(lang, 'pos_out') posmap = config.get(lang, 'pos_map') if t == 'stanford': model = seq.load_pos(stanford_pos, pos_model, posmap) classifier = partial(seq.pos_tag, model=model) if out and model is not None: router[lang]['pos'] = classifier outputs[lang].add('pos') else: logging.warning('No POS Tagger for: {}'.format(lang)) except Exception as ex: logging.warning('No POS Tagger for: {}'.format(lang)) logging.exception(ex) return router, outputs