Beispiel #1
0
def main():
    if len(sys.argv) < 2:
        print "Usage: python check.py <plugin_dir>"
        return sys.exit(1)

    plugin_dir = sys.argv[1]
    if not os.path.isdir(plugin_dir):
        print "Invalid plugin dir: " + plugin_dir
        return sys.exit(1)

    normalize.Normalizer(plugin_dir, test_only=True).normalize()
def standardizeMolVS(inMol):
    f = fragment.LargestFragmentChooser()
    outMol = f.choose(inMol)
    c = charge.Uncharger()
    outMol = c.uncharge(outMol)
    s = Standardizer()
    outMol = s.standardize(outMol)
    n = normalize.Normalizer()
    outMol = n.normalize(outMol)
    t = tautomer.TautomerCanonicalizer()
    outMol = t.canonicalize(outMol)

    # Transform with Inchi
    #print "inMol"
    #print Chem.MolToSmiles(inMol)
    #inchi = Chem.inchi.MolToInchi(inMol)
    #print inchi
    #print "outMol"
    #print Chem.MolToSmiles(outMol)
    #inchi = Chem.inchi.MolToInchi(outMol)
    #print inchi
    #outMol = Chem.inchi.MolFromInchi(inchi)

    return outMol
Beispiel #3
0
    kf = KFold(n_splits=8)
    learning_rate = 1e-2
    batch_size = 30
    max_epoch = 1000
    acceptable_loss = 1e-2

    score_list = []
    highest_score = 0
    model = None
    for train_index, test_index in kf.split(input_matrix):
        input_train, class_train = input_matrix[train_index], class_label[
            train_index]
        input_test, class_test = input_matrix[test_index], class_label[
            test_index]

        train_normalizer = nm.Normalizer()
        train_normalizer.fit(input_train)

        input_train = train_normalizer.transform(input_train,
                                                 scale_to_range=(-1.5, 1.5))

        input_test = train_normalizer.transform(input_test,
                                                scale_to_range=(-1.5, 1.5))

        nn_layers = create_nn()

        cost = np.inf
        moving_cost = np.inf
        epoch = 0
        cost_list = []
Beispiel #4
0
def create_router(config):
    """Given a config object, returns the router and output dictionaries
    """
    router = {}
    outputs = {}
    sections = config.sections()
    langs = [x for x in sections if x not in ['service', 'external', 'codes']]
    langs = sorted(list(set(langs)))

    langmap = {k: v for k, v in config.items('codes')}

    logging.info('languages in configuration: {}'.format(str(langs)))

    stanford_ner = config.get('external', 'stanford_ner')
    stanford_ner = os.path.abspath(stanford_ner)
    stanford_pos = config.get('external', 'stanford_pos')
    stanford_pos = os.path.abspath(stanford_pos)

    for lang in langs:
        logging.info('loading config for {}'.format(lang))
        router[lang] = {}
        outputs[lang] = set()

        # tokenizer
        tokenizer = config.get(lang, 'tokenizer')
        if tokenizer == 'twokenizer':
            router[lang]['tokenizer'] = twokenize.tokenize
        elif tokenizer == 'apostrophes':
            router[lang]['tokenizer'] = twokenize.tokenize_apostrophes
        else:
            msg = 'No such tokenizer: {}'.format(tokenizer)
            raise KeyError(msg)

        # preprocessor
        preprocessor = config.get(lang, 'preprocessor')
        if preprocessor == 'twokenizer':
            router[lang]['preprocessor'] = twokenize.preprocess
        else:
            msg = 'No such preprocessor: {}'.format(preprocessor)
            raise KeyError(msg)

        # ngrams
        n = 3
        try:
            n = config.getint(lang, 'ngrams')
        except:
            pass
        router[lang]['ngrams'] = partial(ngrams, n=n)

        out = False
        try:
            out = config.getboolean(lang, 'ngrams_out')
        except:
            pass
        if out:
            outputs[lang].add('ngrams')

        # normalizer
        t = 'basic'
        try:
            t = config.get(lang, 'normalizer_type')
        except:
            pass
        if t == 'basic':
            model = normalize.Normalizer(langmap[lang])
            normalizer = partial(normalize.normalize, model=model)
            router[lang]['normalizer'] = normalizer
        else:
            msg = 'No such normalizer: {}'.format(t)
            raise KeyError(msg)
        out = False
        try:
            out = config.getboolean(lang, 'normalizer_out')
        except:
            pass
        if out:
            outputs[lang].add('normalizer')

        # sentiment
        try:
            sentiment_model = config.get(lang, 'sentiment_model')
            out = config.getboolean(lang, 'sentiment_out')
            model = sgd.load(sentiment_model)
            classifier = partial(sgd.classify, clf=model)
            if out:
                router[lang]['sentiment'] = classifier
                outputs[lang].add('sentiment')
            else:
                logging.warning('No sentiment classifier for: {}'.format(lang))
        except Exception as ex:
            logging.warning('No sentiment classifier for: {}'.format(lang))
            logging.exception(ex)

        # ner
        if config.has_option(lang, 'ner_model'):
            t = 'stanford'
            model = None
            try:
                # Get config variables for NER
                t = config.get(lang, 'ner_type')
                ner_model = config.get(lang, 'ner_model')
                out = config.getboolean(lang, 'ner_out')

                # NER model type switch
                if t == 'stanford':
                    model = seq.load_ner(stanford_ner, ner_model)
                    classifier = partial(seq.ner_tag, model=model)
                else:
                    msg = 'No such NER type: {}'.format(t)
                    raise KeyError(msg)

                # Check output
                if out and model is not None:
                    router[lang]['ner'] = classifier
                    outputs[lang].add('ner')
                else:
                    logging.warning('No NER for: {}'.format(lang))

            except Exception as ex:
                logging.warning('No NER for: {}'.format(lang))
                logging.exception(ex)

        # pos
        if config.has_option(lang, 'pos_model'):
            t = 'stanford'
            model = None
            try:
                t = config.get(lang, 'pos_type')
                pos_model = config.get(lang, 'pos_model')
                out = config.getboolean(lang, 'pos_out')
                posmap = config.get(lang, 'pos_map')
                if t == 'stanford':
                    model = seq.load_pos(stanford_pos, pos_model, posmap)
                    classifier = partial(seq.pos_tag, model=model)
                if out and model is not None:
                    router[lang]['pos'] = classifier
                    outputs[lang].add('pos')
                else:
                    logging.warning('No POS Tagger for: {}'.format(lang))
            except Exception as ex:
                logging.warning('No POS Tagger for: {}'.format(lang))
                logging.exception(ex)

    return router, outputs