Ejemplo n.º 1
0
def main(args):
    utils.read_config_file(args.config)
    utils.config.update(args.__dict__)

    files = get_files(args.folder)

    for file_path in files:
        file = open(str(file_path), "r")
        raw_content = file.read()
        file.close()
        result_file_path = os.path.join(args.output,
                                        os.path.basename(file_path))
        result_file = open(str(result_file_path), "w")

        sentences = [
            s for s in raw_content.decode('utf-8').strip().split("\n")
            if len(s) > 0 and s != "\n"
        ]

        for sentence in sentences:
            if sentence == graphseg_delimeter:
                result_file.write(wiki_delimiter.encode('utf-8'))
            else:
                result_file.write(sentence.encode('utf-8'))
            result_file.write("\n".encode('utf-8'))
Ejemplo n.º 2
0
def main(args):
    sys.path.append(str(Path(__file__).parent))

    checkpoint_path = Path(args.checkpoint_dir)
    checkpoint_path.mkdir(exist_ok=True)

    logger = utils.setup_logger(__name__, os.path.join(args.checkpoint_dir, 'train.log'))

    utils.read_config_file(args.config)
    utils.config.update(args.__dict__)
#    logger.debug('Running with config %s', utils.config)

 #   configure(os.path.join('runs', args.expname))

    word2vec = None

    if not args.infer:
        dataset_path = utils.config['choidataset']
        train_dataset = ChoiDataset(dataset_path, word2vec)
        dev_dataset = ChoiDataset(dataset_path, word2vec)
        test_dataset = ChoiDataset(dataset_path, word2vec)

        train_dl = DataLoader(train_dataset, batch_size=args.bs, collate_fn=collate_fn, shuffle=True,
                              num_workers=args.num_workers)
        dev_dl = DataLoader(dev_dataset, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False,
                            num_workers=args.num_workers)
        test_dl = DataLoader(test_dataset, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False,
                             num_workers=args.num_workers)

    assert bool(args.model) ^ bool(args.load_from)  # exactly one of them must be set

    if args.model:
        model = import_model(args.model)
    elif args.load_from:
        with open(args.load_from, 'rb') as f:
            model = torch.load(f)

    model.train()
    model = maybe_cuda(model)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    if not args.infer:
        best_val_pk = 1.0
        for j in range(args.epochs):
            train(model, args, j, train_dl, logger, optimizer)
            with (checkpoint_path / 'model{:03d}.t7'.format(j)).open('wb') as f:
                torch.save(model, f)

            val_pk, threshold = validate(model, args, j, dev_dl, logger)
            if val_pk < best_val_pk:
                test_pk = test(model, args, j, test_dl, logger, threshold)
                logger.debug(
                    colored(
                        'Current best model from epoch {} with p_k {} and threshold {}'.format(j, test_pk, threshold),
                        'green'))
                best_val_pk = val_pk
                with (checkpoint_path / 'Meilleur_model.t7'.format(j)).open('wb') as f:
                    torch.save(model, f)
def main(args):
    utils.read_config_file(args.config)
    utils.config.update(args.__dict__)

    if not args.test:
        word2vec = gensim.models.KeyedVectors.load_word2vec_format(
            utils.config['word2vecfile'], binary=True)
    else:
        word2vec = None

    with open(args.model, 'rb') as f:
        model = torch.load(f)
        model.eval()

    segment(args.path, model, word2vec, args.output, wiki=args.wiki)
Ejemplo n.º 4
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.config = read_config_file()
        self.db = model.get_db(self.config['db_path'])
        self.books = []
        for book in self.db.query(model.Book).all():
            self.books.append(book)

        builder = Gtk.Builder()
        builder.add_from_file('./gui/minerva.glade')
        self.window = builder.get_object('minerva_main')
        self.vbox = builder.get_object('vbox')
        self.tv_filters = builder.get_object('tv_filters')
        self.statusbar = builder.get_object('statusbar')
        self.btn_edit = builder.get_object('btn_edit')
        self.btn_delete = builder.get_object('btn_delete')
        self.books = BookList(parent=self, books=self.books)
        self.filters = Gtk.ListStore(str)
        self.search_entry = Gtk.SearchEntry()

        setup_info_bar(self)
        self.vbox.pack_end(self.info_bar, False, True, 0)

        self._setup_filters(self.filters, self.tv_filters)
        builder.get_object('hpaned').pack2(self.books)
        builder.get_object('hbox').pack_start(self.search_entry, True, True, 0)
        builder.connect_signals(self)

        self.search_entry.connect('search-changed', self.on_search_changed)
        self.window.connect('key-press-event', self.on_key_press_event)
Ejemplo n.º 5
0
    def __call__(self, parser, namespace, value, option_string=None):

        config = read_config_file()
        
        format =  namespace.format if namespace.format else config.get('DEFAULT','format')
        oauth = config.getboolean('DEFAULT','oauth')

        # Checking ig OAuth params are defined
        if oauth :
            oauth = YOAuth(None, None, from_file=config.get('auth','from_file'))

        attr = {
            'community': True,
            'format': format,
            #'jsonCompact': namespace.jsonCompact if namespace.jsonCompact else config.getboolean(format, 'jsonCompact'),
            'debug': namespace.debug if namespace.debug else config.getboolean(format, 'debug'),
            'oauth': oauth
        }

        
        yql = MYQL(**attr)
        yql.diagnostics = namespace.diagnostics if namespace.diagnostics else config.getboolean(format, 'diagnostics')

        for v in value:
            response = yql.rawQuery(v)

            if not response.status_code == 200:
                print(response.content)
                sys.exit(1)

            if format == 'json':
                print(pretty_json(response.content))
            else:
                print(pretty_xml(response.content))
        sys.exit(0)
Ejemplo n.º 6
0
def main():

    print "Cancelling orders"
    print "-----------------"

    global config_dict

    inst_token = []
    config_dict = utils.read_config_file()

    acccess_toke, request_token, kite = kite_utils.login_kite(None)
    print kite
    orders = kite.orders()
    print "======================================================="
    for each in orders:
        if each['parent_order_id'] != None:
            print each
            kite.cancel_order(kite.VARIETY_BO,
                              each['order_id'],
                              parent_order_id=each['parent_order_id'])
        else:
            kite.cancel_order(kite.VARIETY_BO,
                              each['order_id'],
                              parent_order_id=None)

    print "======================================================="
def main(args):
    sys.path.append(str(Path(__file__).parent))

    logger = utils.setup_logger(__name__,  'cross_validate_choi.log')

    utils.read_config_file(args.config)
    utils.config.update(args.__dict__)
    logger.debug('Running with config %s', utils.config)

    configure(os.path.join('runs', args.expname))

    if not args.test:
        word2vec = gensim.models.KeyedVectors.load_word2vec_format(utils.config['word2vecfile'], binary=True)
    else:
        word2vec = None


    dataset_path = Path(args.flat_choi)

    with open(args.load_from, 'rb') as f:
        model = torch.load(f)
    model.eval()
    model = maybe_cuda(model)

    test_accuracy = accuracy.Accuracy()

    for j in range(5):
        validate_folder_numbers = range(5)
        validate_folder_numbers.remove(j)
        validate_folder_names = [dataset_path.joinpath(str(num)) for num in validate_folder_numbers]
        dev_dataset = ChoiDataset(dataset_path , word2vec, folder=True, folders_paths=validate_folder_names)
        test_dataset = ChoiDataset(dataset_path, word2vec, folder=True, folders_paths=[dataset_path.joinpath(str(j))])

        dev_dl = DataLoader(dev_dataset, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False,
                            num_workers=args.num_workers)
        test_dl = DataLoader(test_dataset, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False,
                             num_workers=args.num_workers)

        _, threshold = validate(model, args, j, dev_dl, logger)
        test_pk = test(model, args, j, test_dl, logger, threshold, test_accuracy)
        logger.debug(colored('Cross validation section {} with p_k {} and threshold {}'.format(j, test_pk, threshold),'green'))

    cross_validation_pk, _ = test_accuracy.calc_accuracy()
    print ('Final cross validaiton Pk is: ' + str(cross_validation_pk))
    logger.debug(
        colored('Final cross validaiton Pk is: {}'.format(cross_validation_pk), 'green'))
def main(dataset):
    file_path = './config/' + dataset + '/AENet.cfg'
    config = read_config_file(file_path)
    create_dir(config['ckpt_dir'])

    print('Test AE-Net | Dataset: {}'.format(dataset))
    print('\nLoading configuration file {}...done'.format(file_path))
    test_aenet_model(config)
def main(dataset):
    file_path = './config/' + dataset + '/ACRegNet.cfg'
    config = read_config_file(file_path)
    create_dir(config['ckpt_dir'])

    print('Training AC-Regnet | Dataset: {}'.format(dataset))
    print('\nLoading configuration file {}...done'.format(file_path))
    train_acregnet_model(config)
Ejemplo n.º 10
0
def make_prediction(input_data):

    # load pipeline and make predictions
    config = ut.read_config_file('config.yaml')
    _titanic_pipe = joblib.load(
        filename=config[0]['Paths'].get('output_model_path'))
    # return predictions
    results = _titanic_pipe.predict(input_data)
    return results
Ejemplo n.º 11
0
def list_deployed_models_api() -> str:
    api_token = connexion.request.form["api_token"]
    log("[INFO] Calling list_models_api", api_token)
    if auth_token(api_token):
        models_config_list = utils.read_config_file()
        log("[INFO] List of deployed models:\n" + str(models_config_list), api_token)
        return str(models_config_list)
    else:
        return log("[INFO] Invalid token", api_token)
def main(args):
    utils.read_config_file(args.config)
    utils.config.update(args.__dict__)

    with Path(args.file).open('r') as f:
        file_names = f.read().strip().split('\n')

    word2vec = None

    with open(args.model, 'rb') as f:
        #model = torch.load(f)
        #for run in cpu
        model = torch.load(f, map_location='cpu')

        model.eval()

    for name in file_names:
        if name:
            segment(Path(name), model, word2vec, args.output)
Ejemplo n.º 13
0
def main(args):
    utils.read_config_file(args.config)
    utils.config.update(args.__dict__)

    algo_delimeter = graphseg_delimeter

    files = get_files(args.folder)
    acc = accuracy.Accuracy()

    for file_path in files:
        file = open(str(file_path), "r")
        raw_content = file.read()
        file.close()
        sentences = [
            s for s in raw_content.decode('utf-8').strip().split("\n")
            if len(s) > 0 and s != "\n"
        ]
        sentences_length = []
        h = []
        t = []
        is_first_sentence = True
        for sentence in sentences:
            if sentence == truth:
                if not is_first_sentence:
                    t[-1] = 1
                continue
            if sentence == algo_delimeter:
                if not is_first_sentence:
                    h[-1] = 1
                continue
            words = extract_sentence_words(sentence)
            sentences_length.append(len(words))
            t.append(0)
            h.append(0)
            is_first_sentence = False
        t[-1] = 1  # end of last segment
        h[-1] = 1  # they already segment it correctly.

        acc.update(h, t)

    calculated_pk, calculated_windiff = acc.calc_accuracy()
    print 'Pk: {:.4}.'.format(calculated_pk)
    print 'Win_diff: {:.4}.'.format(calculated_windiff)
def main(args):
    utils.read_config_file(args.config)
    utils.config.update(args.__dict__)

    with Path(args.file).open('r') as f:
        file_names = f.read().strip().split('\n')

    if not args.test:
        word2vec = gensim.models.KeyedVectors.load_word2vec_format(utils.config['word2vecfile'], binary=True)
    else:
        word2vec = None

    with open(args.model, 'rb') as f:
        model = torch.load(f)
        model.eval()


    for name in file_names:
        if name:
            segment(Path(name), model, word2vec, args.output, wiki=args.wiki)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-c', '--config', dest='config_file',
                        default='./settings')
    parser.add_argument('-i', '--index', dest='xen_index',
                        default=XEN_INDEX)
    args = parser.parse_args()

    config = read_config_file(args.config_file)

    cursor = connect_to_mysql(**config['mysql'])

    dfs = load_dataframes(cursor)
    data = calculate(dfs)

    es_conn = connect_to_elasticsearch(**config['elasticsearch'])
    write_to_elasticsearch(es_conn, data, args.xen_index)
Ejemplo n.º 16
0
def login_kite(request_token):

    global config_dict

    # read config file
    config_dict = utils.read_config_file()

    #kite = kite_utils.kite_login(request_token)
    config = ConfigParser.ConfigParser()
    config.read(config_dict['data_access'])
    access_token = config.get('MAIN', 'DATA_ACCESS_TOKEN')

    my_api = str(config_dict['kite_api_key'])
    my_api_secret = str(config_dict['kite_api_secret'])

    kite = KiteConnect(api_key=my_api)
    url = kite.login_url()
    print url
    # Redirect the user to the login url obtained
    # from kite.login_url(), and receive the request_token
    # from the registered redirect url after the login flow.
    # Once you have the request_token, obtain the access_token
    # as follows.
    # sys.argv[1] is access token that we get from login
    if request_token == None:
        kite.set_access_token(access_token)
    else:
        data = kite.generate_session(request_token, api_secret=my_api_secret)
        kite.set_access_token(data["access_token"])
        access_token = data["access_token"]
        config.set('MAIN', 'DATA_ACCESS_TOKEN', data["access_token"])

        with open(config_dict['data_access'], 'wb') as configfile:
            config.write(configfile)

    return my_api, access_token, kite
def run_training():
    """Train the model."""
    # read training data
    config = ut.read_config_file('config.yaml')
    path = config[0]['Paths'].get('directory')
    filename = config[0]['Paths'].get('data_filename')
    extension = config[0]['Paths'].get('data_extension')
    cols = config[2]['Feature_Groups'].get('data_columns')
    target = config[2]['Feature_Groups'].get('target')
    data = ut.load_data(path=path,
                        filename=filename,
                        extension=extension,
                        cols=cols)
    # divide train and test
    data[target] = data[target].astype(int)
    X_train, X_test, y_train, y_test = train_test_split(data.drop(target,
                                                                  axis=1),
                                                        data[target],
                                                        test_size=0.2,
                                                        random_state=0)
    # fit pipeline
    titanic_pipe.fit(X_train, y_train)
    # save pipeline
    joblib.dump(titanic_pipe, config[0]['Paths'].get('output_model_path'))
Ejemplo n.º 18
0
def main(args):
    sys.path.append(str(Path(__file__).parent))

    checkpoint_path = Path(args.checkpoint_dir)
    checkpoint_path.mkdir(exist_ok=True)

    logger = utils.setup_logger(__name__, os.path.join(args.checkpoint_dir, 'train.log'))

    utils.read_config_file(args.config)
    utils.config.update(args.__dict__)
    logger.debug('Running with config %s', utils.config)

    configure(os.path.join('runs', args.expname))

    if not args.test:
        word2vec = gensim.models.KeyedVectors.load_word2vec_format(utils.config['word2vecfile'], binary=True)
    else:
        word2vec = None

    if not args.infer:
        if args.wiki:
            dataset_path = Path(utils.config['wikidataset'])
            train_dataset = WikipediaDataSet(dataset_path / 'train', word2vec=word2vec,
                                             high_granularity=args.high_granularity)
            dev_dataset = WikipediaDataSet(dataset_path / 'dev', word2vec=word2vec, high_granularity=args.high_granularity)
            test_dataset = WikipediaDataSet(dataset_path / 'test', word2vec=word2vec,
                                            high_granularity=args.high_granularity)

        else:
            dataset_path = Path(utils.config['choidataset'])
            train_dataset = ChoiDataset(dataset_path / 'train', word2vec)
            dev_dataset = ChoiDataset(dataset_path / 'dev', word2vec)
            test_dataset = ChoiDataset(dataset_path / 'test', word2vec)

        train_dl = DataLoader(train_dataset, batch_size=args.bs, collate_fn=collate_fn, shuffle=True,
                              num_workers=args.num_workers)
        dev_dl = DataLoader(dev_dataset, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False,
                            num_workers=args.num_workers)
        test_dl = DataLoader(test_dataset, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False,
                             num_workers=args.num_workers)

    assert bool(args.model) ^ bool(args.load_from)  # exactly one of them must be set

    if args.model:
        model = import_model(args.model)
    elif args.load_from:
        with open(args.load_from, 'rb') as f:
            model = torch.load(f)

    model.train()
    model = maybe_cuda(model)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    if not args.infer:
        best_val_pk = 1.0
        for j in range(args.epochs):
            train(model, args, j, train_dl, logger, optimizer)
            with (checkpoint_path / 'model{:03d}.t7'.format(j)).open('wb') as f:
                torch.save(model, f)

            val_pk, threshold = validate(model, args, j, dev_dl, logger)

            test_pk = test(model, args, j, test_dl, logger, threshold)
            logger.debug(
                colored(
                    'Current best model from epoch {} with p_k {} and threshold {}'.format(j, test_pk, threshold),
                    'green'))
            best_val_pk = val_pk

    else:
        test_dataset = WikipediaDataSet(args.infer, word2vec=word2vec,
                                        high_granularity=args.high_granularity)
        test_dl = DataLoader(test_dataset, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False,
                             num_workers=args.num_workers)
        print(test(model, args, 0, test_dl, logger, 0.4))
Ejemplo n.º 19
0
import json

import orion
import cygnus

from utils import read_config_file

config = read_config_file('config.ini')
with open(config.get('device_schema_path')) as json_file:
    data = json.load(json_file)

orion.init(config.get('orion_host'), config.get('orion_port'))

orion.register_entity(data, config.get('device_type'), config.get('device_id'), '0.0.0.0:4000')
orion.get_entities_by_type(config.get('device_type'))
#orion.get_entities_by_id(config.get('device_id'))
#orion.update_context(config.get('device_id'),
#                     config.get('device_type'), "true")
#orion.get_entities_by_id(config.get('device_id'))


#cygnus.init(config.get('cygnus_host'), config.get('cygnus_port'))
#cygnus.subscribe_attributes_change(config.get('device_type'), config.get('device_id'), ["status"], config.get('')
Ejemplo n.º 20
0
def main(args):
    sys.path.append(str(Path(__file__).parent))

    checkpoint_path = Path(args.checkpoint_dir)
    checkpoint_path.mkdir(exist_ok=True)

    logger = utils.setup_logger(__name__, os.path.join(args.checkpoint_dir, 'train.log'))

    utils.read_config_file(args.config)
    utils.config.update(args.__dict__)
    logger.debug('Running with config %s', utils.config)

    configure(os.path.join('runs', args.expname))

    if not args.test:
        word2vec = gensim.models.KeyedVectors.load_word2vec_format(utils.config['word2vecfile'], binary=True)
    else:
        word2vec = None

    # read the pretrained BERT sentence embeddings...
    
    bound1 = 14900; bound2 = 2135; bound3 = 50; bound4 = 100; bound5 = 117; bound6 = 227;
    bert_vec = []

    bert_vec = read_sent_bert(bert_vec, '/ubc/cs/research/nlp/Linzi/seg/bert/bert_emb_train_cleaned.txt')
    bert_vec = read_sent_bert(bert_vec, '/ubc/cs/research/nlp/Linzi/seg/bert/bert_emb_dev_cleaned.txt')
    bert_vec = read_sent_bert(bert_vec, '/ubc/cs/research/nlp/Linzi/seg/bert/test1_data_emb_cleaned.txt')
    bert_vec = read_sent_bert(bert_vec, '/ubc/cs/research/nlp/Linzi/seg/bert/bert_emb_test_2.txt')
    bert_vec = read_sent_bert(bert_vec, '/ubc/cs/research/nlp/Linzi/seg/bert/bert_emb_test_3.txt')
    bert_vec = read_sent_bert(bert_vec, '/ubc/cs/research/nlp/Linzi/seg/bert/bert_emb_test_4.txt')
    bert_vec = read_sent_bert(bert_vec, '/ubc/cs/research/nlp/Linzi/seg/bert/bert_emb_test_wiki.txt')
    train_bert = bert_vec[0:bound1]
    dev_bert = bert_vec[bound1:(bound1+bound2)]
    test_bert = bert_vec[(bound1+bound2):(bound1+bound2+bound3)]
    test2_bert = bert_vec[(bound1+bound2+bound3):(bound1+bound2+bound3+bound4)]
    test3_bert = bert_vec[(bound1+bound2+bound3+bound4):(bound1+bound2+bound3+bound4+bound5)]
    test4_bert = bert_vec[(bound1+bound2+bound3+bound4+bound5):(bound1+bound2+bound3+bound4+bound5+bound6)]
    test5_bert = bert_vec[(bound1+bound2+bound3+bound4+bound5+bound6):]

    
    if not args.infer:
        if args.wiki:
            dataset_path = Path(utils.config['wikidataset'])
            train_dataset = WikipediaDataSet(dataset_path / 'train', word2vec=word2vec, high_granularity=args.high_granularity, sent_bert=train_bert)
            dev_dataset = WikipediaDataSet(dataset_path / 'dev', word2vec=word2vec, high_granularity=args.high_granularity, sent_bert=dev_bert)
            test_dataset = WikipediaDataSet(dataset_path / 'test', word2vec=word2vec, high_granularity=args.high_granularity, sent_bert=test_bert)
            test_dataset_2 = WikipediaDataSet(dataset_path / 'test_cities', word2vec=word2vec, high_granularity=args.high_granularity, sent_bert=test2_bert)
            test_dataset_3 = WikipediaDataSet(dataset_path / 'test_elements', word2vec=word2vec, high_granularity=args.high_granularity, sent_bert=test3_bert)
            test_dataset_4 = WikipediaDataSet(dataset_path / 'test_clinical', word2vec=word2vec, high_granularity=args.high_granularity, sent_bert=test4_bert)
            test_dataset_5 = WikipediaDataSet(dataset_path / 'test_wiki', word2vec=word2vec, high_granularity=args.high_granularity, sent_bert=test5_bert)

        else:
            dataset_path = Path(utils.config['choidataset'])
            train_dataset = ChoiDataset(dataset_path / 'train', word2vec)
            dev_dataset = ChoiDataset(dataset_path / 'dev', word2vec)
            test_dataset = ChoiDataset(dataset_path / 'test', word2vec)



        train_dl = DataLoader(train_dataset, batch_size=args.bs, collate_fn=collate_fn, shuffle=True,
                              num_workers=args.num_workers)
        dev_dl = DataLoader(dev_dataset, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False,
                            num_workers=args.num_workers)
        test_dl = DataLoader(test_dataset, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False,
                             num_workers=args.num_workers)
        test_dl_2 = DataLoader(test_dataset_2, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False,
                             num_workers=args.num_workers)
        test_dl_3 = DataLoader(test_dataset_3, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False,
                             num_workers=args.num_workers)
        test_dl_4 = DataLoader(test_dataset_4, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False,
                             num_workers=args.num_workers)
        test_dl_5 = DataLoader(test_dataset_5, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False,
                             num_workers=args.num_workers)

    assert bool(args.model) ^ bool(args.load_from)  # exactly one of them must be set

    if args.model:
        model = import_model(args.model)
    elif args.load_from:
        with open(args.load_from, 'rb') as f:
            model = torch.load(f)

    model.train()
    model = maybe_cuda(model)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    if not args.infer:
        best_val_pk = 1.0
        for j in range(args.epochs):
            train(model, args, j, train_dl, logger, optimizer)

            val_pk, threshold = validate(model, args, j, dev_dl, logger)
            test_pk = test(model, args, j, test_dl, logger, threshold)
            test_pk2 = test(model, args, j, test_dl_2, logger, threshold)
            test_pk3 = test(model, args, j, test_dl_3, logger, threshold)
            test_pk4 = test(model, args, j, test_dl_4, logger, threshold)
            test_pk5 = test(model, args, j, test_dl_5, logger, threshold)
            if val_pk < best_val_pk:
                logger.debug(
                    colored(
                        'Current best model from epoch {} with p_k {} and threshold {}'.format(j, test_pk, threshold),
                        'green'))
                best_val_pk = val_pk

                with (checkpoint_path / 'best_model_transformer.t7'.format(j)).open('wb') as f:
                    torch.save(model, f)


    else:
        test_dataset = WikipediaDataSet(args.infer, word2vec=word2vec,
                                        high_granularity=args.high_granularity)
        test_dl = DataLoader(test_dataset, batch_size=args.test_bs, collate_fn=collate_fn, shuffle=False,
                             num_workers=args.num_workers)
        print(test(model, args, 0, test_dl, logger, 0.4))
Ejemplo n.º 21
0
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import preprocessors as pp
import utils as ut

config = ut.read_config_file('config.yaml')

titanic_pipe = Pipeline(
    # complete with the list of steps from the preprocessors file
    # and the list of variables from the config
    [('categorical_imputer',
      pp.CategoricalImputer(
          variables=config[2]['Feature_Groups'].get('categorical_vars'))),
     ('missing_indicator',
      pp.MissingIndicator(
          variables=config[2]['Feature_Groups'].get('numerical_to_impute'))),
     ('numerical_imputer',
      pp.NumericalImputer(
          variables=config[2]['Feature_Groups'].get('numerical_to_impute'))),
     ('cabin_variable',
      pp.ExtractFirstLetter(
          variables=config[2]['Feature_Groups'].get('categorical_vars')[1])),
     ('rare_label_encoder',
      pp.RareLabelCategoricalEncoder(
          tol=0.05,
          variables=config[2]['Feature_Groups'].get('categorical_vars'))),
     ('categorical_encoder',
      pp.CategoricalEncoder(
          variables=config[2]['Feature_Groups'].get('categorical_vars'))),
Ejemplo n.º 22
0
def main():
    print "Executing CBO Algo for Equities"
    print "-------------------------------"
    global kite
    global fno_dict, base_dict, config_dict, orders
    global scrip_map, sub_list
    global order_dict
    inst_token = []

    #TODO: Add argparser for validating input
    if len(sys.argv) < NO_OF_PARAMS:
        print "Invalid number of params"
        #return

    # read config file
    config_dict = utils.read_config_file()
    
    # get list of fno
    fno_dict = utils.get_fno_dict()

    # get yesterdays high low
    base_dict = get_yesterdays_ohlc(sys.argv[1])
   
    #get kite object
    api_key, access_token, kite = kite_utils.login_kite(None)

    # get instrument list, create quote subscription list and 
    # mapping between instrument token and tradingsymbol
    quote_list = []
    data = kite.instruments("NSE")
    for each in fno_dict:
        for instrument in data:
            if each == instrument['tradingsymbol']:
                entry = "NSE:" + str(instrument['tradingsymbol'])
                quote_list.append(entry)
                # sub list for subscribing to the quotes
                sub_list.append(int(instrument['instrument_token']))
                #mapping dictionary for token and trading symbol
                scrip_map[int(instrument['instrument_token'])] = str(instrument['tradingsymbol'])
    
    print scrip_map
    # open file to write buy/sell orders
    fp = open(config_dict['cbo_seed_file'], "w")
  
    # write header
    utils.write_header(fp, "CBO")

    # Generate order file
    count = int(0)
    quotes = kite.quote(quote_list)
    for each in quotes:
        scrip = each.split(":")[1].strip("\n")
        if scrip not in base_dict:
            continue
        if float(quotes[each]["ohlc"]["open"]) < float(config_dict['start_price']):
            continue
        
        if float(quotes[each]["ohlc"]["open"]) > float(config_dict['end_price']):
            continue
        count = int(count) + int(1);
        buy, sell = generate_orders(scrip, base_dict[scrip], quotes[each]['ohlc']['open'])
        if (buy != None):
            fp.write(buy)
        if (sell != None):
            fp.write(sell)
    fp.close()

    # create dictionary for active orders

    curr_order = kite.orders()
    print "------------------------------------------------"
    print curr_order
    print "------------------------------------------------"


    # push all the orders
    order_list = []
    order_dict = {}
    fp = open(config_dict['cbo_seed_file'])
    for each in fp:
        #ignore line starting with #
        if each.startswith("#"):
            continue
        each = each.rstrip()
        line = each.split(" ")
        scrip = line[SCRIP_ID]
        action = line[ACTION_ID]
        price = line[PRICE_ID]
        t_price = line[TRIGGER_ID]
        target = line[TARGET_ID]
        stoploss = line[STOPLOSS_ID]
        live_price = line[LIVE_PRICE_ID]

        if line[SCRIP_ID] not in order_dict:
            order_dict[scrip] = {}
            order_dict[scrip][action] = {}
        else:
            order_dict[scrip][action] = {}

        order_dict[scrip][action]['price'] = price
        order_dict[scrip][action]['trigger_price'] = t_price
        order_dict[scrip][action]['target'] = target
        order_dict[scrip][action]['stoploss'] = stoploss
        order_dict[scrip][action]['flag'] = 0
        order_dict[scrip][action]['live_price'] = live_price
        
    fp.close()
    
    print "----------------------------------------------------------------"
    print order_dict
    print "----------------- End of order list ----------------------------"
  
    kws = KiteTicker(api_key, access_token, debug=False)
    kws.on_ticks = on_ticks
    kws.on_connect = on_connect
    kws.on_close = on_close
    kws.on_error = on_error
    kws.on_noreconnect = on_noreconnect
    kws.on_reconnect = on_reconnect
    kws.on_order_update = on_order_update
    kws.connect()
Ejemplo n.º 23
0
def main(args):
    utils.read_config_file(args.config)
    utils.config.update(args.__dict__)

    file_path = args.input
    output_folder_path = args.output
    special_delim_sign_path = args.sign

    file = open(str(special_delim_sign_path), "r")
    special_delim_sign = file.read().encode('utf-8').split("\n")[0]
    file.close()

    file = open(str(file_path), "r")
    raw_content = file.read()
    file.close()

    result_file_path = None

    sentences = [
        s for s in raw_content.decode('utf-8').strip().split("\n")
        if len(s) > 0 and s != "\n"
    ]

    last_doc_id = 0
    last_topic = ""

    for sentence in sentences:

        first_comma_index = sentence.index(',')
        second_comma_index = sentence[first_comma_index + 1:].index(',')
        current_doc_id = sentence[0:first_comma_index]
        sign_index = sentence.index(special_delim_sign)
        start_sentence_index = sign_index + 1
        actual_sentence = sentence[start_sentence_index:]
        current_topic = sentence[first_comma_index + second_comma_index +
                                 2:sign_index]

        if (current_doc_id != last_doc_id):
            last_doc_id = current_doc_id
            print 'new file index'
            print last_doc_id
            if (result_file_path != None):
                result_file.close()
            result_file_path = os.path.join(output_folder_path,
                                            str(current_doc_id) + ".text")

            result_file = open(str(result_file_path), "w")
            last_topic = ""

        if (current_topic != last_topic):
            last_topic = current_topic
            level = 1 if (current_topic == "TOP-LEVEL SEGMENT") else 2
            result_file.write(
                (wiki_utils.get_segment_seperator(level, current_topic) +
                 ".").encode('utf-8'))
            result_file.write("\n".encode('utf-8'))

        if ('\n' in sentence):
            print 'back slash in sentnece'
        result_file.write(actual_sentence.encode('utf-8'))
        #result_file.write(".".encode('utf-8'))
        result_file.write("\n".encode('utf-8'))
Ejemplo n.º 24
0
from data import DataHandler
from models import AENet
import tensorflow as tf
from utils import read_config_file, create_dir


def test_aenet_model(config):
    tf.reset_default_graph()
    sess = tf.Session()

    test_lbs, _ = DataHandler.load_labels(config['test_lbs_file'])
    print('Loading test data...done')

    config['batch_size'] = test_lbs.shape[0]

    aenet = AENet(sess, config, 'AENet', is_train=False)
    print('Building AENet model...done')
    aenet.restore(config['ckpt_dir'])
    print('Loading trained AENet model...done')

    print('Testing...')
    aenet.deploy(config['result_dir'], test_lbs)

    print('Testing...done')


if __name__ == "__main__":
    config = read_config_file('./config/JSRT/AENet.cfg')
    create_dir(config['result_dir'])
    test_aenet_model(config)
Ejemplo n.º 25
0
import wutils


# By default, all modules will be enabled, examples will be disabled,
# and tests will be disabled.
modules_enabled  = ['all_modules']
examples_enabled = False
tests_enabled    = False

# Bug 1868:  be conservative about -Wstrict-overflow for optimized builds
# on older compilers; it can generate spurious warnings.  
cc_version_warn_strict_overflow = ('4', '8', '2')

# Get the information out of the NS-3 configuration file.
config_file_exists = False
(config_file_exists, modules_enabled, examples_enabled, tests_enabled) = read_config_file()

sys.path.insert(0, os.path.abspath('waf-tools'))
try:
    import cflags # override the build profiles from waf
finally:
    sys.path.pop(0)

cflags.profiles = {
	# profile name: [optimization_level, warnings_level, debug_level]
	'debug':     [0, 2, 3],
	'optimized': [3, 2, 1],
	'release':   [3, 2, 0],
	}
cflags.default_profile = 'debug'
Ejemplo n.º 26
0
def main():
    print "Executing CBO Algo for Equities"
    print "-------------------------------"

    global fno_dict, base_dict, config_dict, orders

    inst_token = []

    #TODO: Add argparser for validating input
    if len(sys.argv) < NO_OF_PARAMS:
        print "Invalid number of params"
        #return

    # read config file
    config_dict = utils.read_config_file()
    
    # get list of fno
    fno_dict = utils.get_fno_dict()

    # get yesterdays high low
    base_dict = get_yesterdays_ohlc(sys.argv[1])
    
    '''
    #simulate(sys.argv[2])
    #return 
    #open kite connection 
    if len(sys.argv) == int(NO_OF_PARAMS) + int(1):
        request_token = sys.argv[2]
    else:
        request_token = None
    
    #kite = kite_utils.kite_login(request_token)
    config = ConfigParser.ConfigParser()
    config.read(config_dict['data_access'])
    access_token = config.get('MAIN','DATA_ACCESS_TOKEN')
   
    #my_api = "yvyxm4vynkq1pj8q"
    #my_api_secret = "53ekyylrx3orbb85l8isj4o291o22g31" 
    
    my_api = str(config_dict['kite_api_key'])
    my_api_secret = str(config_dict['kite_api_secret'])
    
    kite = KiteConnect(api_key=my_api)
    url = kite.login_url()
    # Redirect the user to the login url obtained
    # from kite.login_url(), and receive the request_token
    # from the registered redirect url after the login flow.
    # Once you have the request_token, obtain the access_token
    # as follows.
    # sys.argv[1] is access token that we get from login
    if request_token == None:
        kite.set_access_token(access_token)
    else:
        data = kite.generate_session(request_token, api_secret=my_api_secret)
        kite.set_access_token(data["access_token"])
        access_token = data["access_token"]
        config.set('MAIN','DATA_ACCESS_TOKEN', data["access_token"])

        with open(config_dict['data_access'], 'wb') as configfile:
            config.write(configfile)
    prit kite
    '''

    kite = kite_utils.login_kite(None)

    # get instrument list
    quote_list = []
    data = kite.instruments("NSE")
    for each in fno_dict:
        for instrument in data:
            if each == instrument['tradingsymbol']:
                entry = "NSE:" + str(instrument['tradingsymbol'])
                quote_list.append(entry)
    
    # open file to write buy/sell orders
    fp = open(config_dict['cbo_seed_file'], "w")
   
    # write header
    outstring = "########################################################################################\n"
    fp.write(outstring)
    outstring = "# CBO file generated for " + time.strftime("%c") +"\n"
    fp.write(outstring)
    outstring = "########################################################################################\n"
    fp.write(outstring)

    count = int(0)
    quotes = kite.quote(quote_list)
    for each in quotes:
        scrip = each.split(":")[1].strip("\n")
        if float(quotes[each]["ohlc"]["open"]) < float(config_dict['start_price']):
            continue
        
        if float(quotes[each]["ohlc"]["open"]) > float(config_dict['end_price']):
            continue

        count = int(count) + int(1);
        buy, sell = generate_orders(scrip, base_dict[scrip], quotes[each]['ohlc']['open'])
        if (buy != None):
            fp.write(buy)
        if (sell != None):
            fp.write(sell)
    fp.close()
    
    # push all the orders
    order_list = []
    fp = open(config_dict['cbo_seed_file'])
    for each in fp:
        #ignore line starting with #
        if each.startswith("#"):
            continue
        each = each.rstrip()
        order_list.append(each.split(" "))
    fp.close()
    print "----------------------------------------------------------------"
    print order_list
    print "----------------- End of order list ----------------------------"
    
    for each in order_list:
        try:
            print each
            order_id = kite.place_order(
                    tradingsymbol=str(each[SCRIP_ID]),
                    exchange="NSE", 
                    transaction_type=str(each[ACTION_ID]),
                    quantity=1,
                    order_type="SL",
                    product="BO",
                    price = float(each[PRICE_ID]),
                    trigger_price = float(each[TRIGGER_ID]),
                    squareoff = float(each[TARGET_ID]),
                    stoploss = float(each[STOPLOSS_ID]),
                    variety = "bo",
                    validity = "DAY"
                    )

            logging.info("Order placed. ID is: {}".format(order_id))
        except Exception as e:
            logging.info("Order placement failed: {}".format(e.message))
Ejemplo n.º 27
0
                self._error_count = error_count
            self._number_failed += no_msgs
            if self.config['input']['type'] == 'tailer':
                self.input.fail(**args)
        else:
            error_count = 0
            self._number_collected += no_msgs
            if self.config['input']['type'] == 'tailer':
                self.input.success(**args)
        self._error_count = error_count

        self._output.close()

    def generate_package(self, data):
        """Parse raw data and package the result in required format"""
        if self.parser:
            data = self.parser.parse(data)
            log.debug("Parser %s parsed data %s: ",
                      self.parser.__class__.__name__, data)

        log.debug("Data to be packaged: %s", data)
        return generate_payload(data, self.metadata)


if __name__ == "__main__":
    # A demo code to show how Collector is used
    config = read_config_file()
    log = create_logger(__name__, config)
    slurm_collector = Collector(config)
    slurm_collector.collect()
Ejemplo n.º 28
0
from flask import Flask, Request
from utils import setup_logger, read_config_file
import os
import rollbar
import rollbar.contrib.flask
from flask import got_request_exception
from celery import Celery
import __builtin__

# Argument parser
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument("-c")
args = arg_parser.parse_args()

# Config
cfg = read_config_file(args.c)

# Create App
app = Flask(__name__, static_url_path='')
app.secret_key = SECRET_KEY
app.permanent_session_lifetime = SESSION_DURATION
documentor = Autodoc(app)
app.config['MONGODB_SETTINGS'] = {
    'db': cfg["database"]["name"],
    'host': cfg["database"]["host"],
    'port': cfg["database"]["port"]
}

# Celery
app.config['CELERY_BROKER_URL'] = cfg['redis']['url']
app.config['CELERY_RESULT_BACKEND'] = cfg['redis']['url']
def main(args):
    start = timer()

    sys.path.append(str(Path(__file__).parent))

    utils.read_config_file(args.config)
    utils.config.update(args.__dict__)

    logger.debug('Running with config %s', utils.config)
    print('Running with threshold: ' + str(args.seg_threshold))
    preds_stats = utils.predictions_analysis()

    if not args.test:
        word2vec = gensim.models.KeyedVectors.load_word2vec_format(
            utils.config['word2vecfile'], binary=True)
    else:
        word2vec = None

    word2vec_done = timer()
    print 'Loading word2vec ellapsed: ' + str(word2vec_done -
                                              start) + ' seconds'
    dirname = 'test'

    if args.wiki:
        dataset_folders = [Path(utils.config['wikidataset']) / dirname]
        if (args.wiki_folder):
            dataset_folders = []
            dataset_folders.append(args.wiki_folder)
        print 'running on wikipedia'
    else:
        if (args.bySegLength):
            dataset_folders = getSegmentsFolders(utils.config['choidataset'])
            print 'run on choi by segments length'
        else:
            dataset_folders = [utils.config['choidataset']]
            print 'running on Choi'

    with open(args.model, 'rb') as f:
        model = torch.load(f)

    model = maybe_cuda(model)
    model.eval()

    if (args.naive):
        model = naive.create()

    for dataset_path in dataset_folders:

        if (args.bySegLength):
            print 'Segment is ', os.path.basename(dataset_path), " :"

        if args.wiki:
            if (args.wiki_folder):
                dataset = WikipediaDataSet(dataset_path,
                                           word2vec,
                                           folder=True,
                                           high_granularity=False)
            else:
                dataset = WikipediaDataSet(dataset_path,
                                           word2vec,
                                           high_granularity=False)
        else:
            dataset = ChoiDataset(dataset_path, word2vec)

        dl = DataLoader(dataset,
                        batch_size=args.bs,
                        collate_fn=collate_fn,
                        shuffle=False)

        with tqdm(desc='Testing', total=len(dl)) as pbar:
            total_accurate = 0
            total_count = 0
            total_loss = 0
            acc = accuracy.Accuracy()

            for i, (data, targets, paths) in enumerate(dl):
                if i == args.stop_after:
                    break

                pbar.update()
                output = model(data)
                targets_var = Variable(maybe_cuda(torch.cat(targets, 0),
                                                  args.cuda),
                                       requires_grad=False)
                batch_loss = 0
                output_prob = softmax(output.data.cpu().numpy())
                output_seg = output_prob[:, 1] > args.seg_threshold
                target_seg = targets_var.data.cpu().numpy()
                batch_accurate = (output_seg == target_seg).sum()
                total_accurate += batch_accurate
                total_count += len(target_seg)
                total_loss += batch_loss
                preds_stats.add(output_seg, target_seg)

                current_target_idx = 0
                for k, t in enumerate(targets):
                    document_sentence_count = len(t)
                    sentences_length = [s.size()[0] for s in data[k]
                                        ] if args.calc_word else None
                    to_idx = int(current_target_idx + document_sentence_count)
                    h = output_seg[current_target_idx:to_idx]

                    # hypothesis and targets are missing classification of last sentence, and therefore we will add
                    # 1 for both
                    h = np.append(h, [1])
                    t = np.append(t.cpu().numpy(), [1])

                    acc.update(h, t, sentences_length=sentences_length)

                    current_target_idx = to_idx

                logger.debug('Batch %s - error %7.4f, Accuracy: %7.4f', i,
                             batch_loss, batch_accurate / len(target_seg))
                pbar.set_description('Testing, Accuracy={:.4}'.format(
                    batch_accurate / len(target_seg)))

        average_loss = total_loss / len(dl)
        average_accuracy = total_accurate / total_count
        calculated_pk, _ = acc.calc_accuracy()

        logger.info('Finished testing.')
        logger.info('Average loss: %s', average_loss)
        logger.info('Average accuracy: %s', average_accuracy)
        logger.info('Pk: {:.4}.'.format(calculated_pk))
        logger.info('F1: {:.4}.'.format(preds_stats.get_f1()))

        end = timer()
        print('Seconds to execute to whole flow: ' + str(end - start))
Ejemplo n.º 30
0
from argparse import ArgumentParser
from utils import config, read_config_file

parser = ArgumentParser()
parser.add_argument('--cuda', help='Is cuda?', action='store_true')
parser.add_argument('--model', help='Model file path', required=True)
parser.add_argument('--config',
                    help='Path to config.json',
                    default='config.json')
parser.add_argument('--test', help='Use fake word2vec', action='store_true')
parser.add_argument('--port', type=int, help='List to this port')

args = parser.parse_args()

read_config_file(args.config)
config.update(args.__dict__)

from webapp import app
app.run(debug=True, port=args.port)
Ejemplo n.º 31
0
Archivo: app.py Proyecto: iluddy/flock
from flask import Flask, Request
from utils import setup_logger, read_config_file
import os
import rollbar
import rollbar.contrib.flask
from flask import got_request_exception
from celery import Celery
import __builtin__

# Argument parser
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument("-c")
args = arg_parser.parse_args()

# Config
cfg = read_config_file(args.c)

# Create App
app = Flask(__name__, static_url_path='')
app.secret_key = SECRET_KEY
app.permanent_session_lifetime = SESSION_DURATION
documentor = Autodoc(app)
app.config['MONGODB_SETTINGS'] = {
    'db': cfg["database"]["name"],
    'host': cfg["database"]["host"],
    'port': cfg["database"]["port"]
}

# Celery
app.config['CELERY_BROKER_URL'] = cfg['redis']['url']
app.config['CELERY_RESULT_BACKEND'] = cfg['redis']['url']
Ejemplo n.º 32
0
def main(args):
    utils.read_config_file(args.config)
    utils.config.update(args.__dict__)

    file_path = args.input
    segments_path = args.segment
    output_folder_path = args.output

    file = open(str(segments_path), "r")
    segments_content = file.read()
    file.close()

    file = open(str(file_path), "r")
    raw_content = file.read()
    file.close()

    sentences = [
        s for s in raw_content.decode('utf-8').strip().split("\n")
        if len(s) > 0 and s != "\n"
    ]
    segments = [
        s for s in segments_content.decode('utf-8').strip().split("\n")
        if len(s) > 0 and s != "\n"
    ]

    result_file_path = None

    last_doc_id = 0
    last_topic = ""

    if (len(sentences) != len(segments)):
        print "len(sentences) != len(segments)"
        return

    for i in range(len(sentences)):

        sentence = sentences[i]
        segment = segments[i].encode('utf-8').split("\r")[0]

        first_comma_index = segment.index(',')
        second_comma_index = segment[first_comma_index + 1:].index(',')
        current_doc_id = segment[0:first_comma_index]
        current_topic = segment[first_comma_index + second_comma_index + 2:]

        if (current_doc_id != last_doc_id):
            last_doc_id = current_doc_id
            print 'new file index'
            print last_doc_id
            if (result_file_path != None):
                result_file.close()
            result_file_path = os.path.join(output_folder_path,
                                            str(current_doc_id) + ".text")
            result_file = open(str(result_file_path), "w")
            last_topic = ""

        if (current_topic != last_topic):
            last_topic = current_topic
            level = 1 if (current_topic == "TOP-LEVEL SEGMENT") else 2
            result_file.write(
                (wiki_utils.get_segment_seperator(level, current_topic) +
                 ".").encode('utf-8'))
            result_file.write("\n".encode('utf-8'))

        actual_sentence = sentence
        result_file.write(actual_sentence.encode('utf-8'))
        if ('\n' in sentence):
            print 'back slash in sentnece'
        #result_file.write(".".encode('utf-8'))
        result_file.write("\n".encode('utf-8'))