def main(): logging.basicConfig(format='%(levelname)s:%(asctime)s %(message)s',level=settings.LOGLEVEL) es = Elasticsearch(settings.ELASTICSEARCH_CONNECT_STRING) logging.info('Market Refresh Rate: ' + str(settings.MARKET_REFRESH_RATE) + ' seconds.') logging.info('Initial Sleep: ' + str(5) + ' seconds.') logging.info('Application Started.') #supported_exchanges = [BitFinex_Market(), BitMex_Market(), BitTrex_Market(), GDAX_Market(), Gemini_Market(), Kraken_Market(), OKCoin_Market(), Poloniex_Market()] tmp = ['bitstamp', 'gdax', 'kraken', 'gemini'] exchanges = [ExchangeHarness(x) for x in tmp] #print active exchanges and create indexes in kibana based on products listed in each market for exchange in exchanges: logging.info(exchange.exchange_id + ': activated and indexed.') for product, kibana_index in exchange.products.items(): utils.create_index(es, kibana_index) logging.warn('Initiating Market Tracking.') #Record Ticks while True: with ThreadPoolExecutor(max_workers=5) as executor: try: sleep(settings.MARKET_REFRESH_RATE) executor.map(lambda ex: ex.record_ticker(es), exchanges) logging.info("added another ticker record") except Exception as e: logging.warning(e) sleep(settings.RETRY_RATE)
def run_reranking(new_index,sentence_in,qid,specific_ws,ref_doc,out_index,texts,new_trectext_name,ranked_lists,new_feature_file,feature_dir,trec_file,score_file,options): new_text = update_text_doc(texts[ref_doc],sentence_in,out_index) create_new_trectext(ref_doc,texts,new_text,new_trectext_name) create_specifi_ws(qid,ranked_lists,specific_ws) logger.info("creating features") create_index(new_trectext_name, os.path.dirname(new_index), os.path.basename(new_index), options.home_path, options.indri_path) features_file = create_features_file_diff(feature_dir, options.index_path, new_index, new_feature_file, specific_ws, options.scripts_path,options.java_path,options.swig_path,options.stopwords_file,options.queries_text_file,options.home_path) logger.info("creating docname index") docname_index = create_index_to_doc_name_dict(features_file) logger.info("docname index creation is completed") logger.info("features creation completed") logger.info("running ranking model on features file") score_file = run_model(features_file, options.home_path, options.java_path, options.jar_path, score_file, options.model) logger.info("ranking completed") logger.info("retrieving scores") scores = retrieve_scores(docname_index, score_file) logger.info("scores retrieval completed") logger.info("creating trec_eval file") tmp_trec = create_trec_eval_file(scores, trec_file) logger.info("trec file creation is completed") logger.info("ordering trec file") final = order_trec_file(tmp_trec) logger.info("ranking procedure completed") return final
def main(): logging.basicConfig(format='%(levelname)s:%(asctime)s %(message)s', level=settings.LOGLEVEL) es = Elasticsearch(settings.ELASTICSEARCH_CONNECT_STRING) logging.info('Market Refresh Rate: ' + str(settings.MARKET_REFRESH_RATE) + ' seconds.') logging.info('Initial Sleep: ' + str(5) + ' seconds.') logging.info('Application Started.') RESTful_exchanges = [ 'bittrex', 'kraken', 'poloniex', 'kucoin', 'cryptopia' ] exchanges = [ExchangeHarness(x) for x in RESTful_exchanges] # print active exchanges and create indexes in kibana based on products listed in each market for exchange in exchanges: logging.info(exchange.exchange.id + ': activated and indexed.') for product, kibana_index in exchange.products.items(): utils.create_index(es, kibana_index['ticker']) utils.create_index(es, kibana_index['orderbook']) logging.warning('Initiating Market Tracking.') #Record Ticks while True: loop = asyncio.get_event_loop() try: for exchange in exchanges: asyncio.ensure_future(exchange.record_data(es)) loop.run_forever() except Exception as e: logging.warning(e) loop.close()
def main(): logging.basicConfig(format='%(levelname)s:%(asctime)s %(message)s',level=settings.LOGLEVEL) es = Elasticsearch(settings.ELASTICSEARCH_CONNECT_STRING) logging.info('Market Refresh Rate: ' + str(settings.MARKET_REFRESH_RATE) + ' seconds.') logging.info('Initial Sleep: ' + str(settings.INITIAL_SLEEP) + ' seconds.') sleep(settings.INITIAL_SLEEP) logging.info('Application Started.') exchanges = [ex() for ex in support_exchange] # print active exchanges and create indexes in kibana based on products listed in each market for exchange in exchanges: logging.info(exchange.exchange + ': activated and indexed.') for product, kibana_index in exchange.products.iteritems(): utils.create_index(es, kibana_index) logging.warn('Initiating Market Tracking.') # Record Ticks while True: sleep(settings.MARKET_REFRESH_RATE) try: for exchange in exchanges: exchange.record_ticker(es) except Exception as e: logging.warning(e) sleep(settings.RETRY_RATE)
def main(): logging.basicConfig(format='%(levelname)s:%(asctime)s %(message)s',level=settings.LOGLEVEL) es = Elasticsearch(settings.ELASTICSEARCH_CONNECT_STRING) logging.info('Market Refresh Rate: ' + str(settings.MARKET_REFRESH_RATE) + ' seconds.') logging.info('Initial Sleep: ' + str(settings.INITIAL_SLEEP) + ' seconds.') sleep(settings.INITIAL_SLEEP) logging.info('Application Started.') #supported_exchanges = [BitFinex_Market(), BitMex_Market(), BitTrex_Market(), GDAX_Market(), Gemini_Market(), Kraken_Market(), OKCoin_Market(), Poloniex_Market()] exchanges = [BitFinex_Market(), BitMex_Market(), BitTrex_Market(), GDAX_Market(), Gemini_Market(), Kraken_Market(), OKCoin_Market(), Poloniex_Market()] #print active exchanges and create indexes in kibana based on products listed in each market for exchange in exchanges: logging.info(exchange.exchange + ': activated and indexed.') for product, kibana_index in exchange.products.iteritems(): utils.create_index(es, kibana_index) logging.warn('Initiating Market Tracking.') #Record Ticks while True: sleep(settings.MARKET_REFRESH_RATE) try: for exchange in exchanges: exchange.record_ticker(es) except Exception as e: logging.warning(e) sleep(settings.RETRY_RATE)
def method(bucket, key, file): try: index_name = 'index_name' index_mapping = 'index_mapping' create_index(index_name, index_mapping) except ClusterException as e: print(e) raise ClusterException(e) except Exception as e: raise IndexingException(e)
def evaluate_episode(data, config, model, loss_fn, eval): x_te, y_te, te_len, te_mask, text_te = utils.load_test(data, eval) y_ind_te = utils.create_index(y_te) kl_loss = torch.nn.KLDivLoss(reduction='sum').to(config['device']) reverse_dict = data['reverse_dict'] y_te_ind = utils.create_index(y_te) num_class = np.unique(y_te) num_test_query = config['num_query_per_class'] * config['num_class'] x_support, y_support, x_len_support, support_m, sup_text = utils.load_support( data, False) y_support_ind = utils.create_index(y_support) total_prediction = np.array([], dtype=np.int64) total_y_test = np.array([], dtype=np.int64) cum_acc = [] cum_loss = 0.0 with torch.no_grad(): for episode in range(config['num_episodes']): support_feature, support_class, support_len, support_ind, support_mask, support_text, query_feature, query_class, query_len, query_ind, query_mask, query_text = utils.create_query_support( x_support, y_support, x_len_support, y_support_ind, support_m, sup_text, x_te, y_te, te_len, y_te_ind, te_mask, text_te, config, config['num_test_class']) support_feature, support_id, support_ind, support_len, support_mask = convert_to_tensor( support_feature, support_class, support_ind, support_len, support_mask, config['device']) query_feature, query_id, query_ind, query_len, query_mask = convert_to_tensor( query_feature, query_class, query_ind, query_len, query_mask, config['device']) prediction, incons_loss, support_attn, query_attn, support_thres, query_thres = model.forward( support_feature, support_len, support_mask, query_feature, query_len, query_mask) pred = np.argmax(prediction.cpu().detach().numpy(), 1) cur_acc = accuracy_score(query_class, pred) cum_acc.append(cur_acc) val_loss = 0.0 cum_loss += val_loss cum_loss = cum_loss / config['num_episodes'] cum_acc = np.array(cum_acc) avg_acc, std_acc = np.mean(cum_acc), np.std(cum_acc) print("Average accuracy", avg_acc) print("STD", std_acc) return avg_acc, cum_loss
def train(data, config, current_directory): x_tr = data['x_tr'] y_tr = data['y_tr'] y_ind_tr = utils.create_index(y_tr) tr_mask = data['mask_tr'] x_len_tr = data['len_tr'] x_text = data['text_tr'] x_support_tr, y_support_tr, x_len_support_tr, support_m_tr, support_text_tr = utils.load_support( data, True) y_support_ind_tr = utils.create_index(y_support_tr) embedding = data['embedding'] model, optimizer, loss_fn = load_model(config, embedding) model.train() train_episode(x_support_tr, y_support_tr, x_len_support_tr, y_support_ind_tr, support_m_tr, support_text_tr, x_tr, y_tr, x_len_tr, y_ind_tr, tr_mask, x_text, config, model, loss_fn, optimizer, current_directory) return loss_fn
def prepare_data(infile: str = '', test: str = '', save: bool = False, entity: bool = False, minimum: int = 1, maximum: int = 40, lang: str = 'english' ) -> Tuple[List[Document], DocumentsRaw, g3.Analysis]: """ - Extracts sentences from the input file - documents only holds sentences which will be used in the A matrix - sentences_raw[sent_id] = raw_sentence (origText) """ assert infile or test, f"Either need to have an input file or be in test mode" assert not ( infile and test), f"Can't input a file while in test mode. Remove the '-i' flag." documents: List[Document] = [] sentences_raw: DocumentsRaw = {} if infile: client = AdvancedClient( apiHost='http://alpha.g', apiPath=AdvancedClient.apiPathForWorkflow('prototype-full-analysis-news'), batchSize=5, threadCount=4, ) with open(infile) as fh: rq_builder = RequestBuilder.fullAnalysis( language=LANGUAGE[lang], domain='news') analysis = client.analyzeOne(rq_builder.build(text=fh.read())) if save: with open(save, 'w') as fout: print(json.dumps(g3.toDict(analysis), ensure_ascii=False), file=fout) elif test: with open(test, 'r') as fp: analysis = g3.reader.fromDict(json.load(fp)) token_to_mention, sent_to_token_to_rel = utils.create_index(analysis) data_split = [sent for sent in analysis.paragraphs[0].sentences] for sentence in data_split: # Here a document is a sentence, and `sent` is a tuple as so: sent = (snet_id, wordlist) sent = utils.read_doc(sentence, token_to_mention, sent_to_token_to_rel, entity=entity, minimum=minimum, maximum=maximum, lang=lang) if sent: documents.append(sent) sentences_raw[sentence._id] = sentence.origText return documents, sentences_raw, analysis
import sys from utils import FORM, XPOS, DEPREL from utils import create_dictionary, create_index, read_conllu, map_to_instances, shuffled_stream from utils import parse_projective from layers import Embeddings, BiLSTM import random class MLP(object): pass if __name__ == "__main__": random.seed(1) train_file = "../treebanks/train/en/en.conllu" index = create_index(create_dictionary(read_conllu(train_file))) train_data = list( map_to_instances(read_conllu(train_file), index, (FORM, XPOS))) max_epochs = 30 lstm_dim = 250 arc_hidden_dim = 100 label_hidden_dim = 100 pc = dy.ParameterCollection() # embeddings = Embeddings(pc, [(len(index[FORM])+1, 100), (len(index[XPOS])+1, 25)]) # input_dim = embeddings.dim input_dim = 125 num_labels = len(index[DEPREL])
parser.add_argument("--inputfile", required=True) parser.add_argument("--outbasename", required=True) parser.add_argument("--fields", default=["FORM", "UPOS", "FEATS", "DEPREL"], nargs='+') parser.add_argument("--size", default=[100, 25, 25, 0], type=int, nargs='+') parser.add_argument("--min_frequency", default=5, type=int) parser.add_argument("--window", default=5, type=int) parser.add_argument("--sg") parser.add_argument("--seed", default=1, type=int) args = parser.parse_args() args.fields = [STR_TO_FIELD[f.lower()] for f in args.fields] return args if __name__ == "__main__": from gensim.models import Word2Vec args = _parse_args() print("building index...", end=" ") dic = create_dictionary(read_conllu(args.inputfile), fields=args.fields) index = create_index(dic, min_frequency=args.min_frequency) print("done") write_index(args.outbasename, index, args.fields) _word2vec(index, args)
def connection(): test_connection = get_connection() create_index(test_connection) yield test_connection delete_index(test_connection)
def evaluate_nonepisode(data, config, model, loss_fn, eval): x_te, y_te, te_len, te_mask, text_te = utils.load_test(data, eval) x_te, y_te, te_len, te_mask, text_te = utils.shuffle_data( x_te, y_te, te_len, te_mask, text_te) y_te_ind = utils.create_index(y_te) reverse_dict = data['reverse_dict'] num_class = np.unique(y_te) num_test_query = config['num_query_per_class'] * num_class.shape[0] x_support, y_support, x_len_support, support_m, support_text = utils.load_support( data, False) y_support_ind = utils.create_index(y_support) test_batch = int(math.ceil(x_te.shape[0] / float(num_test_query))) total_prediction = np.array([], dtype=np.int64) total_y_test = np.array([], dtype=np.int64) with torch.no_grad(): for batch in range(test_batch): support_feature, support_class, support_len, support_ind, support_mask = utils.init_support_query( config['num_samples_per_class'], x_te.shape[1], num_class.shape[0]) query_feature, query_class, query_len, query_ind, query_mask = utils.init_support_query( config['num_query_per_class'], x_te.shape[1], num_class.shape[0]) begin_index = batch * (num_test_query) end_index = min((batch + 1) * num_test_query, x_te.shape[0]) query_feature = x_te[begin_index:end_index] query_len = te_len[begin_index:end_index] query_class = y_te[begin_index:end_index] query_mask = te_mask[begin_index:end_index] query_text = text_te[begin_index:end_index] support_idx = 0 num_class = np.unique(y_support) for counter in range(num_class.shape[0]): class_index = np.where(y_support == num_class[counter])[0] old_support_idx = support_idx support_idx = support_idx + config['num_samples_per_class'] support_feature[old_support_idx:support_idx] = x_support[ class_index] support_class[old_support_idx:support_idx] = y_support[ class_index] support_len[old_support_idx:support_idx] = x_len_support[ class_index] support_mask[old_support_idx:support_idx] = support_m[ class_index] support_text[old_support_idx:support_idx] = support_text[ class_index] cs = np.unique(query_class) #Obtain indexes q_ind_key = {} s_ind_key = {} for i in range(len(cs)): q_index = np.where(query_class == cs[i])[0] s_index = np.where(support_class == cs[i])[0] q_ind_key[cs[i]] = q_index s_ind_key[cs[i]] = s_index # Reset class index for i in range(len(cs)): query_class[q_ind_key[cs[i]]] = i support_class[s_ind_key[cs[i]]] = i support_ind = utils.create_index(support_class) query_ind = utils.create_index(query_class) support_feature, support_id, support_ind, support_len, support_mask = convert_to_tensor( support_feature, support_class, support_ind, support_len, support_mask, config['device']) query_feature, query_id, query_ind, query_len, query_mask = convert_to_tensor( query_feature, query_class, query_ind, query_len, query_mask, config['device']) prediction, _, support_attn, query_attn = model.forward( support_feature, support_len, support_mask, query_feature, query_len, query_mask) pred = np.argmax(prediction.cpu().detach().numpy(), 1) total_prediction = np.concatenate((total_prediction, pred)) total_y_test = np.concatenate((total_y_test, query_class)) acc = accuracy_score(total_y_test, total_prediction) cnf = confusion_matrix(total_y_test, total_prediction) print("Confusion matrix:") print(cnf) return acc
def setup_es(self): self.es = Elasticsearch(self.es_address) utils.create_index(self.es, '{}.tickers'.format(self.index_prefix)) utils.create_index(self.es, '{}.orderbooks.l2'.format(self.index_prefix)) utils.create_index(self.es, '{}.orderbooks.l1'.format(self.index_prefix)) utils.create_index(self.es, '{}.orderbooks.l0'.format(self.index_prefix)) utils.create_index(self.es, '{}.volumes'.format(self.index_prefix)) utils.create_index(self.es, '{}.instrument'.format(self.index_prefix)) utils.create_index(self.es, '{}.funds'.format(self.index_prefix))