def oneD(self, nxdata, scan): '''*internal*: generic data parser for 1-D column data, returns signal and axis''' for column in scan.L: self.write_ds(nxdata, column, scan.data[column]) signal = utils.clean_name(scan.column_last) # primary Y axis axis = utils.clean_name(scan.column_first) # primary X axis self.mca_spectra(nxdata, scan, axis) # records any MCA data return signal, axis
def download(self, path=None): if self.available is False: # print('Cant download %s' % c_ount(self.name)) return url = self.media_url if url is None: return if path is None: path = SAVE_PATH folder = clean_name(self.name) try: # Make sure the show folder exists os.makedirs(os.path.join(path, folder)) except OSError as e: if not os.path.isdir(os.path.join(path, folder)): raise fp = os.path.join(path, folder, self.file_name) q = 'high' # fix me t = (url, q, fp) Downloader(self).add((url, q, fp)) return t
def process_midwest_html(fname, product_dict): '''Process the midwest html file''' with open(fname, 'r') as f: soup = BeautifulSoup(f) product_li = soup.find_all('li', {'class': 'account-listItem'}) for product in product_li: titles = product.find('h5', {"class": "account-product-title"}) try: # add any variables if there are any vars = product.find_all('dd', {'class': 'definitionList-value'}) vars = " ".join([v.string for v in vars]) if vars is None: name = titles.string else: name = titles.string + vars # change quantity to int quantity = int(name[0]) name = name[4:] # clean name name = clean_name(name, replace_dict_noreg) # add to dict product_dict[name] = quantity except AttributeError as e: pass return product_dict
def retrieve_plot_data(self): '''retrieve default data from spec data file''' ''' data parser for 2-D mesh and hklmesh ''' label1, start1, end1, intervals1, label2, start2, end2, intervals2, time = self.scan.scanCmd.split()[1:] if label1 not in self.scan.data: label1 = self.scan.L[0] # mnemonic v. name if label2 not in self.scan.data: label2 = self.scan.L[1] # mnemonic v. name axis1 = self.scan.data.get(label1) axis2 = self.scan.data.get(label2) intervals1, intervals2 = map(int, (intervals1, intervals2)) start1, end1, start2, end2, time = map(float, (start1, end1, start2, end2, time)) if len(axis1) < intervals1 and min(axis2) == max(axis2): # stopped scan before second row started, 1-D plot is better (issue #82) self.axes = [label1,] self.signal = self.scan.column_last self.data[label1] = self.scan.data[label1] self.data[self.signal] = self.scan.data[self.signal] return axis1 = axis1[0:intervals1+1] self.data[label1] = axis1 # 1-D array axis2 = [axis2[row] for row in range(len(axis2)) if row % (intervals1+1) == 0] self.data[label2] = axis2 # 1-D array column_labels = self.scan.L column_labels.remove(label1) # special handling column_labels.remove(label2) # special handling if self.scan.scanCmd.startswith('hkl'): # find the reciprocal space axis held constant label3 = [key for key in ('H', 'K', 'L') if key in column_labels][0] self.data[label3] = self.scan.data.get(label3)[0] # constant # build 2-D data objects (do not build label1, label2, [or label3] as 2-D objects) data_shape = [len(axis2), len(axis1)] for label in column_labels: if label not in self.data: axis = numpy.array( self.scan.data.get(label) ) self.data[label] = utils.reshape_data(axis, data_shape) else: pass self.signal = utils.clean_name(self.scan.column_last) self.axes = [label1, label2] if spec.MCA_DATA_KEY in self.scan.data: # 3-D array(s) # save each spectrum for key, spectrum in sorted(self.scan.data[spec.MCA_DATA_KEY].items()): num_channels = len(spectrum[0]) data_shape.append(num_channels) mca = numpy.array(spectrum) data = utils.reshape_data(mca, data_shape) channels = range(1, num_channels+1) ds_name = '_' + key + '_' self.data[ds_name] = data self.data[ds_name+'channel_'] = channels
def evaluate_model(self, questions_fpath=os.path.join('res', 'model', 'questions-words.txt')): if clean_name(self.fname).endswith('pos'): pos_file(questions_fpath) questions_fpath = questions_fpath+'.pos' return self.model.accuracy(questions_fpath)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-mn", "--model_name", help="model name", default='spanishEtiquetado.bin') parser.add_argument("-qn", "--questions_name", help="questions name", default='ambiguous_verbs.sp') parser.add_argument("-w", "--window", help="model window size", type=int, default=5) parser.add_argument("-n", "--n_proc", help="number of processes", type=int, default=4) args = parser.parse_args() word2vec.logger.setLevel(logging.DEBUG) w2v = W2V(args.model_name,n_proc=args.n_proc, window=args.window) pos_name = clean_name(args.model_name) +'.pos' + '.bin' w2v_pos = W2V(pos_name,n_proc=args.n_proc, window=args.window) # print(len(word2vec_exp.model.vocab)) # print(word2vec_exp.model.vocab.items()[:10]) # print(word2vec_exp.model.similarity('add_VB','remove_VB')) # print(len(model.vocab.keys())) questions_fpath = os.path.join('res', 'mult', args.questions_name) print(datetime.datetime.now()) eval1 = w2v.evaluate_model(questions_fpath) print(datetime.datetime.now()) eval2 = w2v_pos.evaluate_model(questions_fpath) print(datetime.datetime.now()) missing1, missing2 = compare_section(eval1, eval2, to_section_name(args.questions_name))
def download(self, path=None): if self.available is False: # print('Cant download %s' % c_ount(self.name)) return url = self.media_url if url is None: return if path is None: path = SAVE_PATH folder = clean_name(self.name) try: # Make sure the show folder exists os.makedirs(os.path.join(path, folder)) except OSError: if not os.path.isdir(os.path.join(path, folder)): raise fp = os.path.join(path, folder, self.file_name) q = 'high' # fix me t = (url, q, fp) Downloader(self).add((url, q, fp)) return t
def clean_results(self, results, imdb=False): subtitles = {} user_ranks = { 'administrator': 1, 'platinum member': 2, 'vip member': 3, 'gold member': 4, 'trusted': 5, 'silver member': 6, 'bronze member': 7, 'sub leecher': 8, '': 9, } for result in results: if result['SubBad'] != '1': movie_hash = result.get('MovieHash') if not movie_hash: movie_hash = self.imdbid_to_hash[int(result['IDMovieImdb'])] subid = result['IDSubtitleFile'] downcount = int(result['SubDownloadsCnt']) rating = float(result['SubRating']) if rating and rating < 8: # Ignore poorly rated subtitles, while not # penalizing the ones that haven't yet been rated continue user_rank = user_ranks[result['UserRank']] if imdb: cleaned_release_name = utils.clean_name(result['MovieReleaseName']) file_name = self.moviefiles[movie_hash]['file_name'] cleaned_file_name = utils.clean_name(file_name) overlap = len(set.intersection(set(cleaned_release_name), set(cleaned_file_name))) else: overlap = 0 subtitles.setdefault(movie_hash, []).append({ 'subid': subid, 'downcount': downcount, 'rating': rating, 'user_rank': user_rank, 'overlap' : overlap }) return subtitles
def compute_position(event, context): # Load Price from s3 bucket bucket_name = environ.get("BUCKET") usdt_file_name = environ.get("USDT_PRICE_FILE_NAME") client = boto3.client("s3") file = client.get_object(Bucket=bucket_name, Key=usdt_file_name) price_df = pd.read_csv(file["Body"], compression='gzip') order_history = json.loads(event['body'])['data'] order_df = pd.DataFrame(order_history) order_df = order_df.apply(pd.to_numeric, errors='ignore') order_df = order_df[order_df['executedQty'] > 0] order_df = order_df[[ 'symbol', 'executedQty', 'side', 'updateTime', 'price' ]] order_df['updateTime'] = order_df['updateTime'].apply(convert_to_datetime) order_df['symbol'] = order_df['symbol'].apply(clean_name) order_df.reset_index(inplace=True, drop=True) order_df['symbol'] = order_df['symbol'] + "USDT_close" order_df['side'] = np.where(order_df['side'] == "BUY", 1, -1) order_df['executedQty'] = order_df['executedQty'] * order_df['side'] price_df = price_df[list(order_df['symbol'].unique()) + ['timestamp']] price_df['timestamp'] = pd.to_datetime(price_df['timestamp']) price_df = price_df[price_df['timestamp'] >= order_df['updateTime'].min() - timedelta(hours=4)] # [Potential Bug] price_df.set_index('timestamp', inplace=True) pos_df = pd.DataFrame(columns=price_df.columns) for t in price_df.index: temp = order_df[order_df['updateTime'] < t] temp = temp.groupby('symbol').sum()['executedQty'] pos_df = pos_df.append(temp) pos_df.index = price_df.index nav_df = price_df.multiply(pos_df, axis=0) nav_df.index = pd.to_datetime(nav_df.index, "%Y-%m-%d") nav_df.columns = [clean_name(n) for n in list(nav_df.columns)] nav_df = nav_df.fillna(0) nav_timeseries_data = [] for t in nav_df.index: unix_secs = mktime(t.timetuple()) for col in nav_df.columns: nav_timeseries_data.append([unix_secs, col, nav_df.loc[t, col]]) response = { "statusCode": "200", "headers": { 'Access-Control-Allow-Origin': "*", 'Access-Control-Allow-Credentials': True, }, "body": json.dumps({"data": nav_timeseries_data}) } return response
def gen_one_test_feature(): # process test data and save in pickle # testdatafeatures --> {pid-with-index: {candidate-aids: [...], data: [[xxx], [xxx], [xxx]...]}} valid_nuass = load_json(VALID_UNASS_PATH) valid_pub = load_json(VALID_PUB_PATH) # whole_author_profile_pub = load_json(WHOLE_AUTHOR_PROFILE_PUB_PATH) aid2yearinfo = load_pickle( os.path.join(NEW_DATA_V2_DIR, 'aid2yearinfo.pkl')) aid2coauthor = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2coauthor.pkl')) aid2venue = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2venue.pkl')) aid2keywords = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2keywords.pkl')) aid2year = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2year.pkl')) aid2orgwithyear = load_pickle( os.path.join(NEW_DATA_V2_DIR, 'aid2orgwithyear.pkl')) name2aids = load_pickle(os.path.join(NEW_DATA_DIR, 'name2aids.pkl')) # aid2pids = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2pids.pkl')) testdatafeatures = {} all_authors_name = list(name2aids.keys()) all_aids = [] for key in name2aids: aids = name2aids[key] all_aids.extend(aids.tolist()) all_aids = np.array(all_aids) for pid_with_index in tqdm.tqdm(valid_nuass): inner_dict = {} now_pid, index = pid_with_index.split('-') author_name = valid_pub[now_pid]['authors'][int(index)]['name'] author_name = clean_name(author_name) index = get_name_index(author_name, all_authors_name) author_name = all_authors_name[index] candidate_aids = name2aids[author_name] candidate_aids = all_aids inner_dict['candidate-aids'] = candidate_aids data = [] for aid in candidate_aids: print(aid) new_pair = (aid, pid_with_index) pid_info_dict = valid_pub[now_pid] aid_author_info_dict = aid2coauthor[aid] aid_year_info_dict = aid2year[aid] aid_venue_dict = aid2venue[aid] aid_org_year_list = aid2orgwithyear[aid] aid_keywords_dict = aid2keywords[aid] aid_year_all_info_dict = aid2yearinfo[aid] data.append( get_features(new_pair, pid_info_dict, aid_author_info_dict, aid_year_info_dict, aid_venue_dict, aid_org_year_list, aid_keywords_dict, aid_year_all_info_dict)) data = np.array(data) inner_dict['data'] = data testdatafeatures[pid_with_index] = inner_dict break save_pickle(testdatafeatures, './testdatafeatures_one.pkl')
def gen_test_feature(): # process test data and save in pickle # testdatafeatures --> {pid-with-index: {candidate-aids: [...], data: [[xxx], [xxx], [xxx]...]}} valid_nuass = load_json(VALID_UNASS_PATH) valid_pub = load_json(VALID_PUB_PATH) # whole_author_profile_pub = load_json(WHOLE_AUTHOR_PROFILE_PUB_PATH) aid2yearinfo = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2yearinfo.pkl')) aid2coauthor = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2coauthor.pkl')) aid2venue = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2venue.pkl')) aid2keywords = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2keywords.pkl')) aid2year = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2year.pkl')) aid2orgwithyear = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2orgwithyear.pkl')) name2aids = load_pickle(os.path.join(NEW_DATA_DIR, 'name2aids.pkl')) aid2pids = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2pids.pkl')) aid2orgset = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2orgset.pkl')) aid2venueset = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2venueset.pkl')) aid2keywordsset = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2keywordsset.pkl')) all_pids_len = 0 for aid in aid2pids: all_pids_len += len(aid2pids[aid]) testdatafeatures = {} all_authors_name = list(name2aids.keys()) # author_name_count = defaultdict(int) for pid_with_index in tqdm.tqdm(valid_nuass): inner_dict = {} now_pid, index = pid_with_index.split('-') author_name = valid_pub[now_pid]['authors'][int(index)]['name'] author_name = clean_name(author_name) index = get_name_index(author_name, all_authors_name) author_name = all_authors_name[index] # author_name_count[author_name] += 1 # continue candidate_aids = name2aids[author_name] inner_dict['candidate-aids'] = candidate_aids data = [] for aid in candidate_aids: new_pair = (aid, pid_with_index) pid_info_dict = valid_pub[now_pid] aid_author_info_dict = aid2coauthor[aid] aid_year_info_dict = aid2year[aid] aid_venue_dict = aid2venue[aid] aid_org_year_list = aid2orgwithyear[aid] aid_keywords_dict = aid2keywords[aid] aid_year_all_info_dict = aid2yearinfo[aid] org_info_set = aid2orgset[aid] aid_venue_set = aid2venueset[aid] aid_keywords_set = aid2keywordsset[aid] data.append(get_features(new_pair, pid_info_dict, aid_author_info_dict, aid_year_info_dict, aid_venue_dict, aid_org_year_list, aid_keywords_dict, aid_year_all_info_dict, org_info_set, aid_venue_set, aid_keywords_set)) data[-1].append(len(aid2pids[aid]) / all_pids_len) data = np.array(data) inner_dict['data'] = data testdatafeatures[pid_with_index] = inner_dict save_pickle(testdatafeatures, os.path.join(TEST_FEATURE_DIR_V2, 'testdatafeatures-withsetinfo-papercount.pkl'))
def get_coauthor_count_for_enhence(aid_pid_pair, aid_author_info_dict, pid_info_dict): index = int(aid_pid_pair[1].split('-')[1]) authors = pid_info_dict['authors'] authors = [clean_name(item['name']) for item in authors] authors.pop(index) count = 0 for author_name in authors: if author_name in aid_author_info_dict.keys(): count += 1 return count
def create_model(self, fname, max_news=99, n_proc=1, window=5, splits=100): name = clean_name(fname) model = word2vec.Word2Vec(window=window, workers=n_proc) if name == 'text8': sentences = word2vec.Text8Corpus(os.path.join('res', 'model', 'text8')) model.train(sentences) elif name == 'brown': # sentences = word2vec.BrownCorpus(fpath) sentences = brown.sents() model.train(sentences) elif name.startswith('news'): target_fpath = os.path.join('res', 'model', name+'.txt') if not os.path.exists(target_fpath): build_news_corpus(name, max_news, n_proc, target_fpath) sentences = word2vec.LineSentence(target_fpath) model.build_vocab(sentences) model.train(sentences) # elif name.startswith('wikipedia.deps'): # target_fpath = os.path.join('res', 'model', name+'.txt') # if not os.path.exists(target_fpath): # build_wikipedia_corpus(name, max_news, n_proc, target_fpath) elif name.startswith('spanishEtiquetado'): target_fpath = os.path.join('res', 'model', name+'.txt') if not os.path.exists(target_fpath): path = os.path.join('res', 'model', 'spanishEtiquetado') max_pos_len = re.search('\d+', name) if max_pos_len: max_pos_len = int(max_pos_len.group(0)) build_corpus(path, name.endswith('pos'), target_fpath, max_pos_len) sentences = word2vec.LineSentence(target_fpath) # with open(target_fpath) as fp: # sentences = fp.readlines() model.build_vocab(sentences) model.train(sentences) else: target_fpath = os.path.join('res', 'model', name+'.txt') file_to_lower(target_fpath) sentences = word2vec.LineSentence(target_fpath) model.build_vocab(sentences) model.train(sentences) # n_sents = len(sentences) # print(n_sents) # if splits == 0: # splits = 1 # split_size = int(n_sents/splits) # for i in range(splits): # print(str(i) + '\r') # split_sentences = sentences[i*split_size:(i+1)*split_size-1] # model.save_word2vec_format(os.path.join('res', 'model', fname), binary=fname.endswith('.bin')) # model.save() # model.save(os.path.join('res',name+'.model')) model.save_word2vec_format(os.path.join('res', 'model', fname), binary=fname.endswith('.bin'))
def __init__(self, data, *args, **kwargs): self.data = data self.name = data.get('name', '') or data.get('title', '') self.name = self.name.strip() self.title = data.get('title', '') self.type = data.get('type') self.id = data.get('id') self.available = data.get('isAvailable', False) self._image_url = "http://m.nrk.no/m/img?kaleidoId=%s&width=%d" if self.data.get('episodeNumberOrDate'): self.full_title = '%s %s' % (self.name, self._fix_sn(self.data.get('seasonId'), season_ids=kwargs.get('seasonIds'))) else: self.full_title = self.title self.file_name = self._filename(self.full_title) self.file_path = os.path.join(SAVE_PATH, clean_name(self.name), self.file_name) self._image_id = data.get('imageId') or kwargs.get('imageId')
def gen_test_title_abstract_vec(mission='title'): if mission == 'title': aid2cate = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2titlevec.pkl')) elif mission == 'abstract': aid2cate = load_pickle( os.path.join(NEW_DATA_DIR, 'aid2abstractvec.pkl')) else: raise ValueError('mission value error') valid_nuass = load_json(VALID_UNASS_PATH) valid_pub = load_json(VALID_PUB_PATH) name2aids = load_pickle(os.path.join(NEW_DATA_DIR, 'name2aids.pkl')) texttovec = TextToVec() all_authors_name = list(name2aids.keys()) # test_cate_feature --> {pid-with-index: {candidate-aids: [...], data: [(emb0, meb1), ...]}} test_cate_feature = {} for pid_with_index in tqdm.tqdm(valid_nuass): inner_dict = {} now_pid, index = pid_with_index.split('-') author_name = valid_pub[now_pid]['authors'][int(index)]['name'] author_name = clean_name(author_name) index = get_name_index(author_name, all_authors_name) author_name = all_authors_name[index] candidate_aids = name2aids[author_name] inner_dict['candidate-aids'] = candidate_aids data = [] for aid in candidate_aids: info = valid_pub[now_pid].get(mission) if info is None: emb = np.zeros(300) else: emb = texttovec.get_vec(info) emb_pair = (aid2cate[aid], emb) data.append(emb_pair) inner_dict['data'] = data test_cate_feature[pid_with_index] = inner_dict save_pickle( test_cate_feature, os.path.join(TEST_FEATURE_DIR_V2, 'test-%s-emb-pair.pkl' % mission))
def check_name(): problem_pids = load_json(os.path.join(FINAL_DIR, 'problem.pids.3.json')) name2aids = load_pickle(os.path.join(NEW_DATA_DIR, 'name2aids.pkl')) test_pub = load_json(TEST_PUB_PATH) all_authors_name = list(name2aids.keys()) name_map = [] for pid_with_index in tqdm.tqdm(problem_pids): now_pid, index = pid_with_index.split('-') author_name_no_clean = test_pub[now_pid]['authors'][int(index)]['name'] author_name = clean_name(author_name_no_clean) if pid_with_index == 'ToCcabLT-1': author_name = 'junliang_wang' if pid_with_index == 'cVvvcFzj-1': author_name = 'xiaojun_liu' index = get_name_index(author_name, all_authors_name) author_name_inlist = all_authors_name[index] # if author_name_inlist != author_name: name_map.append((pid_with_index, author_name_no_clean, author_name, author_name_inlist)) name_map = list(set(name_map)) print(len(name_map)) save_json(name_map, os.path.join(FINAL_DIR, 'name.different.3.json'))
continue ms_song_id = dir_name.split("/")[-1] if ms_song_id not in songs_info: print("========== SONG", ms_song_id, "NOT IN SONGS INFO DATABASE ============") continue # A song directory might contain more than one version, select one selected_version_path = get_version_with_highest_unr(dir_name, files) #selected_version_md5 = os.path.splitext(selected_version_path)[0].split("/")[-1] with open(selected_version_path, "rb") as midi_file: selected_version_md5 = hashlib.md5(midi_file.read()).hexdigest() # Check for duplicates if selected_version_md5 not in songs: songs[selected_version_md5] = selected_version_path # Get song and artist names h5 = ms.hdf5_getters.open_h5_file_read(songs_info[ms_song_id]) song_name = clean_name(ms.hdf5_getters.get_title(h5)) artist_name = clean_name(ms.hdf5_getters.get_artist_name(h5)) h5.close() print("Adding song", song_name, "by", artist_name) add_song(selected_version_path, song_name, artist_name, opt.out, genre_mapping)
def _filename(self, name=None): name = clean_name('%s' % name or self.full_title) name = name.replace(' ', '.') + '.WEBDL-nrkdl' return name
def generate_depicted_people(self): depicted = " / ".join( [utils.clean_name(x["name"]) for x in self.depicted]) return depicted
def gen_base_feature(index, multi_size): # process test data and save in pickle # testdatafeatures --> {pid-with-index: {candidate-aids: [...], data: [[xxx], [xxx], [xxx]...]}} test_unass = load_json(TEST_UNASS_PATH) test_pub = load_json(TEST_PUB_PATH) # whole_author_profile_pub = load_json(WHOLE_AUTHOR_PROFILE_PUB_PATH) aid2yearinfo = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2yearinfo.pkl')) aid2coauthor = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2coauthor.pkl')) aid2venue = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2venue.pkl')) aid2keywords = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2keywords.pkl')) aid2year = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2year.pkl')) aid2orgwithyear = load_pickle( os.path.join(NEW_DATA_DIR, 'aid2orgwithyear.pkl')) name2aids = load_pickle(os.path.join(NEW_DATA_DIR, 'name2aids.pkl')) # aid2pids = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2pids.pkl')) aid2orgset = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2orgset.pkl')) aid2venueset = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2venueset.pkl')) aid2keywordsset = load_pickle( os.path.join(NEW_DATA_DIR, 'aid2keywordsset.pkl')) name_map = load_json( os.path.join(FINAL_DIR, 'name.different.modified.json')) original_name = [pair[0] for pair in name_map] changed_name = [pair[1] for pair in name_map] name_map2 = load_json( os.path.join(FINAL_DIR, 'name.different.2.modified.json')) original_name2 = [pair[0] for pair in name_map2] changed_name2 = [pair[1] for pair in name_map2] single_range = math.ceil(len(test_unass) / multi_size) start = index * single_range end = (index + 1) * single_range if (index + 1) * single_range < len( test_unass) else len(test_unass) testdatafeatures = {} all_authors_name = list(name2aids.keys()) print('Gen test features ...') for pid_with_index in tqdm.tqdm(test_unass[start:end]): inner_dict = {} now_pid, index = pid_with_index.split('-') author_name = test_pub[now_pid]['authors'][int(index)]['name'] author_name = clean_name(author_name) if pid_with_index == 'ToCcabLT-1': author_name = 'junliang_wang' if pid_with_index == 'cVvvcFzj-1': author_name = 'xiaojun_liu' if author_name in original_name2: name_index = original_name2.index(author_name) author_name = changed_name2[name_index] elif author_name in original_name: name_index = original_name.index(author_name) author_name = changed_name[name_index] else: index = get_name_index(author_name, all_authors_name) author_name = all_authors_name[index] if isinstance(author_name, str): candidate_aids = name2aids[author_name] elif isinstance(author_name, list): candidate_aids = [] for name in author_name: candidate_aids.extend(name2aids[name].tolist()) candidate_aids = np.array(candidate_aids) else: raise ValueError("check author name ! ! !") inner_dict['candidate-aids'] = candidate_aids data = [] for aid in candidate_aids: new_pair = (aid, pid_with_index) pid_info_dict = test_pub[now_pid] aid_author_info_dict = aid2coauthor[aid] aid_year_info_dict = aid2year[aid] aid_venue_dict = aid2venue[aid] aid_org_year_list = aid2orgwithyear[aid] aid_keywords_dict = aid2keywords[aid] aid_year_all_info_dict = aid2yearinfo[aid] org_info_set = aid2orgset[aid] aid_venue_set = aid2venueset[aid] aid_keywords_set = aid2keywordsset[aid] data.append( get_features(new_pair, pid_info_dict, aid_author_info_dict, aid_year_info_dict, aid_venue_dict, aid_org_year_list, aid_keywords_dict, aid_year_all_info_dict, org_info_set, aid_venue_set, aid_keywords_set)) data = np.array(data) inner_dict['data'] = data testdatafeatures[pid_with_index] = inner_dict # save_pickle(testdatafeatures, os.path.join(TEST_FEATURE_DIR, 'u6uRzaff-5.pkl')) return testdatafeatures
def write_ds(self, group, label, data, **attr): '''*internal*: writes a dataset to the HDF5 file, records the SPEC name as an attribute''' clean_name = utils.clean_name(label) eznx.write_dataset(group, clean_name, data, spec_name=label, **attr)
def mesh(self, nxdata, scan): '''*internal*: data parser for 2-D mesh and hklmesh''' # 2-D parser: http://www.certif.com/spec_help/mesh.html # mesh motor1 start1 end1 intervals1 motor2 start2 end2 intervals2 time # 2-D parser: http://www.certif.com/spec_help/hklmesh.html # hklmesh Q1 start1 end1 intervals1 Q2 start2 end2 intervals2 time # mesh: data/33id_spec.dat scan 22 # hklmesh: data/33bm_spec.dat scan 17 signal, axes = '', ['',] label1, start1, end1, intervals1, label2, start2, end2, intervals2, time = scan.scanCmd.split()[1:] if label1 not in scan.data: label1 = scan.L[0] # mnemonic v. name if label2 not in scan.data: label2 = scan.L[1] # mnemonic v. name axis1 = scan.data.get(label1) axis2 = scan.data.get(label2) intervals1, intervals2 = map(int, (intervals1, intervals2)) start1, end1, start2, end2, time = map(float, (start1, end1, start2, end2, time)) if len(axis1) < intervals1: # stopped scan before second row started signal, axes = self.oneD(nxdata, scan) # fallback support else: axis1 = axis1[0:intervals1+1] axis2 = [axis2[row] for row in range(len(axis2)) if row % (intervals1+1) == 0] column_labels = scan.L column_labels.remove(label1) # special handling column_labels.remove(label2) # special handling if scan.scanCmd.startswith('hkl'): # find the reciprocal space axis held constant label3 = [key for key in ('H', 'K', 'L') if key not in (label1, label2)][0] axis3 = scan.data.get(label3)[0] self.write_ds(nxdata, label3, axis3) self.write_ds(nxdata, label1, axis1) # 1-D array self.write_ds(nxdata, label2, axis2) # 1-D array # build 2-D data objects (do not build label1, label2, [or label3] as 2-D objects) data_shape = [len(axis1), len(axis2)] for label in column_labels: if label not in nxdata: axis = np.array( scan.data.get(label) ) self.write_ds(nxdata, label, utils.reshape_data(axis, data_shape)) else: pass signal = utils.clean_name(scan.column_last) axes = ':'.join([label1, label2]) if '_mca_' in scan.data: # 3-D array(s) # save each spectrum for key, spectrum in sorted(scan.data['_mca_'].items()): num_channels = len(spectrum[0]) data_shape.append(num_channels) mca = np.array(spectrum) data = utils.reshape_data(mca, data_shape) channels = range(1, num_channels+1) ds_name = '_' + key + '_' self.write_ds(nxdata, ds_name, data, axes=axes+':'+ds_name+'channel_', units='counts') self.write_ds(nxdata, ds_name+'channel_', channels, units='channel') return signal, axes
def main(): with Halo("Setting up script details.", spinner="dots") as spinner: league_id = os.environ.get("SLEEPER_LEAGUE_ID", None) user_id = os.environ.get("SLEEPER_USER_ID", None) args = parser.parse_args() command_args = dict(vars(args)) is_dry_run = command_args.pop("dry_run", None) keep_positions = tuple(command_args.pop("positions", None)) spinner.succeed() Halo(f"Included positions are {', '.join(keep_positions)}", spinner="dots").succeed() league = League(league_id) players = Players() league_rosters = league.get_rosters() if is_dry_run: all_players = players.get_all_players() with open("./data/sleeper_players_current.json", "w") as outfile: json.dump(all_players, outfile) else: with open("./data/sleeper_players_current.json", "r") as infile: all_players = json.load(infile) own_team = [ team for team in league_rosters if team["owner_id"] == user_id ].pop() own_players = own_team["players"] keep_players = { p_id: p_data for p_id, p_data in all_players.items() if p_data["position"] in keep_positions } # save keep_players for testing with open("./data/sleeper_players_keep.json", "w") as outfile: json.dump(keep_players, outfile) # ID free agents by comparing keep_players to rosters rostered_player_ids = [ player for team in league_rosters for player in team["players"] ] with Halo("Separating players into rostered and FAs.", spinner="dots") as spinner: free_agents = { p_id: p_data for p_id, p_data in keep_players.items() if p_id not in rostered_player_ids } rostered_players = { p_id: p_data for p_id, p_data in keep_players.items() if p_id in rostered_player_ids } spinner.succeed() with Halo("Pulling Numberfire Projections", spinner="dots") as spinner: nfp = numberfireProjections("half_ppr") nfp.get_data("flex") nfp.convert_projections() spinner.succeed() nf_cleaned_names = {clean_name(x): x for x in nfp.projections.keys()} # add projections in to rosters for p_id, p_data in free_agents.items(): if p_data["search_full_name"] in nf_cleaned_names.keys(): p_data["numberfire_projections"] = nfp.projections[ nf_cleaned_names[p_data["search_full_name"]]] else: p_data["numberfire_projections"] = 0 for p_id, p_data in rostered_players.items(): if p_data["search_full_name"] in nf_cleaned_names.keys(): p_data["numberfire_projections"] = nfp.projections[ nf_cleaned_names[p_data["search_full_name"]]] else: p_data["numberfire_projections"] = 0 Halo("Added projections to FAs and rostered players.", spinner="dots").succeed() # comparison own_roster = { p_id: p_data for p_id, p_data in rostered_players.items() if p_id in own_players } waiver_players = dict() for p_id, p_data in own_roster.items(): if p_data["status"] == "Injured Reserve": continue waiver_dict = { "drop_proj": p_data["numberfire_projections"], "players_to_add": list(), } for fa_id, fa_data in free_agents.items(): if (fa_data["numberfire_projections"] > p_data["numberfire_projections"]) and ( fa_data["position"] == p_data["position"]): fa_dict = { "waiver_player": fa_data["search_full_name"], "waiver_proj": fa_data["numberfire_projections"], } waiver_dict["players_to_add"].append(fa_dict) waiver_players[p_data["search_full_name"]] = waiver_dict Halo( "Compared FA projections to your roster. Returning players with better projections.", spinner="dots", ).succeed() pp = pprint.PrettyPrinter() pp.pprint(waiver_players)
def mesh(self, nxdata, scan): '''*internal*: data parser for 2-D mesh and hklmesh''' # TODO: refactor to use NeXus data model: signal, axes, data # 2-D parser: http://www.certif.com/spec_help/mesh.html # mesh motor1 start1 end1 intervals1 motor2 start2 end2 intervals2 time # 2-D parser: http://www.certif.com/spec_help/hklmesh.html # hklmesh Q1 start1 end1 intervals1 Q2 start2 end2 intervals2 time # mesh: data/33id_spec.dat scan 22 # hklmesh: data/33bm_spec.dat scan 17 signal, axes = '', [ '', ] label1, start1, end1, intervals1, label2, start2, end2, intervals2, time = scan.scanCmd.split( )[1:] if label1 not in scan.data: label1 = scan.L[0] # mnemonic v. name if label2 not in scan.data: label2 = scan.L[1] # mnemonic v. name axis1 = scan.data.get(label1) axis2 = scan.data.get(label2) intervals1, intervals2 = map(int, (intervals1, intervals2)) start1, end1, start2, end2, time = map( float, (start1, end1, start2, end2, time)) if len(axis1) < intervals1: # stopped scan before second row started signal, axes = self.oneD(nxdata, scan) # fallback support else: axis1 = axis1[0:intervals1 + 1] axis2 = [ axis2[row] for row in range(len(axis2)) if row % (intervals1 + 1) == 0 ] column_labels = scan.L column_labels.remove(label1) # special handling column_labels.remove(label2) # special handling if scan.scanCmd.startswith('hkl'): # find the reciprocal space axis held constant label3 = [ key for key in ('H', 'K', 'L') if key not in (label1, label2) ][0] axis3 = scan.data.get(label3)[0] self.write_ds(nxdata, label3, axis3) self.write_ds(nxdata, label1, axis1) # 1-D array self.write_ds(nxdata, label2, axis2) # 1-D array # build 2-D data objects (do not build label1, label2, [or label3] as 2-D objects) data_shape = [len(axis1), len(axis2)] for label in column_labels: if label not in nxdata: axis = np.array(scan.data.get(label)) self.write_ds(nxdata, label, converters.reshape_data(axis, data_shape)) else: pass signal = utils.clean_name(scan.column_last) axes = ':'.join([label1, label2]) if spec.MCA_DATA_KEY in scan.data: # 3-D array(s) # save each spectrum for key, spectrum in sorted(scan.data[spec.MCA_DATA_KEY].items()): num_channels = len(spectrum[0]) data_shape.append(num_channels) mca = np.array(spectrum) data = converters.reshape_data(mca, data_shape) channels = range(1, num_channels + 1) ds_name = '_' + key + '_' self.write_ds(nxdata, ds_name, data, axes=axes + ':' + ds_name + 'channel_', units='counts') self.write_ds(nxdata, ds_name + 'channel_', channels, units='channel') return signal, axes
def preprocessing(mission='train'): # os.makedirs(NEW_DATA_DIR, exist_ok=True) # ------------------------------------------ # process whole_author_profile.json, add index, and save to pickle # save format: name2aids --> {name: [aids, ...]}, aid2pids --> {aid: [pid-index, ...]} os.makedirs(NEW_DATA_DIR, exist_ok=True) os.makedirs(NEW_DATA_V2_DIR, exist_ok=True) if mission == 'train': whole_author_profile = load_json( os.path.join(SPLIT_DIR, 'train_profile-last1year.json')) elif mission == 'test': whole_author_profile = load_json(WHOLE_AUTHOR_PROFILE_PATH) else: raise ValueError("check mission value") whole_author_profile_pub = load_json(WHOLE_AUTHOR_PROFILE_PUB_PATH) name2aids = {} aid2pids = {} aids = [] names = [] pids_with_index = [] for aid in tqdm.tqdm(whole_author_profile): aids.append(aid) names.append(whole_author_profile[aid]['name']) pids = whole_author_profile[aid]['papers'] tmp = [] for paper in pids: paper_authors = whole_author_profile_pub[paper]['authors'] author_names = [clean_name(item['name']) for item in paper_authors] # print(author_names) index = get_name_index(names[-1], author_names) tmp.append('%s-%d' % (paper, index)) pids_with_index.append(tmp) assert len(aids) == len(names) assert len(names) == len(pids_with_index) print('all aids num: ', len(aids)) name_set = set(names) names_array = np.array(names) aids_array = np.array(aids) for name in name_set: target_aid = aids_array[names_array == name] name2aids[name] = target_aid for aid, pid in zip(aids, pids_with_index): aid2pids[aid] = pid if mission == 'train': save_pickle(name2aids, os.path.join(NEW_DATA_V2_DIR, 'name2aids.pkl')) save_pickle(aid2pids, os.path.join(NEW_DATA_V2_DIR, 'aid2pids.pkl')) elif mission == 'test': save_pickle(name2aids, os.path.join(NEW_DATA_DIR, 'name2aids.pkl')) save_pickle(aid2pids, os.path.join(NEW_DATA_DIR, 'aid2pids.pkl')) # ------------------------------------------ # save format: aid2year --> {aid: {min: xxx, max: xxx, mean: xxx, median: xxx, min_max_avg: xxx, year_list: [year, ...]}} if mission == 'train': aid2pids = load_pickle(os.path.join(NEW_DATA_V2_DIR, 'aid2pids.pkl')) elif mission == 'test': aid2pids = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2pids.pkl')) aid2year = {} print('Process year info ...') for aid in tqdm.tqdm(aid2pids.keys()): pids = aid2pids[aid] all_years = [] for pid_with_index in pids: pid = pid_with_index.split('-')[0] year = whole_author_profile_pub[pid].get('year', '0') if year == '': year = 0 else: year = int(year) if any([year < 1500, year > 2100]): year = 0 all_years.append(year) all_years = np.array(all_years) all_years = all_years[all_years != 0] if len(all_years) == 0: year_info = None else: year_info = { 'min': np.min(all_years), 'max': np.max(all_years), 'mean': np.mean(all_years), 'min_max_avg': (np.min(all_years) + np.max(all_years)) / 2, 'median': np.median(all_years), 'year_list': all_years, } aid2year[aid] = year_info if mission == 'train': save_pickle(aid2year, os.path.join(NEW_DATA_V2_DIR, 'aid2year.pkl')) elif mission == 'test': save_pickle(aid2year, os.path.join(NEW_DATA_DIR, 'aid2year.pkl')) # ------------------------------------------ # save format: aid2coauthor --> {aid: {anuthor-name: count, ...}} aid2coauthor = {} print('aid2coauthor processing ...') for aid in tqdm.tqdm(aid2pids.keys()): pids = aid2pids[aid] inner_dict = defaultdict(int) for pid_with_index in pids: pid, index = pid_with_index.split('-') authors = whole_author_profile_pub[pid]['authors'] authors_name = [clean_name(item['name']) for item in authors] authors_name.pop(int(index)) for name in authors_name: inner_dict[name] += 1 aid2coauthor[aid] = inner_dict if mission == 'train': save_pickle(aid2coauthor, os.path.join(NEW_DATA_V2_DIR, 'aid2coauthor.pkl')) elif mission == 'test': save_pickle(aid2coauthor, os.path.join(NEW_DATA_DIR, 'aid2coauthor.pkl')) # ------------------------------------------ # save format: aid2venue --> {aid: {venue-name: count ...}} aid2venue = {} print('aid2venue processing ...') for aid in tqdm.tqdm(aid2pids.keys()): pids = aid2pids[aid] inner_dict = defaultdict(int) for pid_with_index in pids: pid, index = pid_with_index.split('-') venue = whole_author_profile_pub[pid]['venue'].lower() if venue != '': # aid2venue[aid].add(venue) inner_dict[venue] += 1 aid2venue[aid] = inner_dict if mission == 'train': save_pickle(aid2venue, os.path.join(NEW_DATA_V2_DIR, 'aid2venue.pkl')) elif mission == 'test': save_pickle(aid2venue, os.path.join(NEW_DATA_DIR, 'aid2venue.pkl')) # ------------------------------------------ # save format: aid2keywords --> {aid: {keyword: count, ...}} aid2keywords = {} print('aid2keywords processing ...') for aid in tqdm.tqdm(aid2pids.keys()): pids = aid2pids[aid] inner_dict = defaultdict(int) for pid_with_index in pids: pid, index = pid_with_index.split('-') keywords = whole_author_profile_pub[pid].get('keywords', '') if len(keywords) == 0: continue for keyword in keywords: if keyword != '': # aid2keywords[aid].add(keyword.lower()) inner_dict[keyword] += 1 aid2keywords[aid] = inner_dict if mission == 'train': save_pickle(aid2keywords, os.path.join(NEW_DATA_V2_DIR, 'aid2keywords.pkl')) elif mission == 'test': save_pickle(aid2keywords, os.path.join(NEW_DATA_DIR, 'aid2keywords.pkl')) # ------------------------------------------ # save format: aid2orgset--> {aid: set{org_word, org_word, ...}} aid2orgset = {} for aid in tqdm.tqdm(aid2pids.keys()): pids = aid2pids[aid] inner_set = set() for pid_with_index in pids: pid, index = pid_with_index.split('-') author = whole_author_profile_pub[pid].get('authors')[int(index)] org = author.get('org', '').lower().strip() org_set = set(org.split()) inner_set = inner_set | org_set aid2orgset[aid] = inner_set if mission == 'train': save_pickle(aid2orgset, os.path.join(NEW_DATA_V2_DIR, 'aid2orgset.pkl')) elif mission == 'test': save_pickle(aid2orgset, os.path.join(NEW_DATA_DIR, 'aid2orgset.pkl')) # ------------------------------------------ # save format: aid2venueset--> {aid: set{venue_word, venue_word, ...}} aid2venueset = {} for aid in tqdm.tqdm(aid2pids.keys()): pids = aid2pids[aid] inner_set = set() for pid_with_index in pids: pid, index = pid_with_index.split('-') venue = whole_author_profile_pub[pid].get('venue', '').lower() if venue == '': continue else: venue_set = set(venue.replace('-', ' ').split()) inner_set = inner_set | venue_set aid2venueset[aid] = inner_set if mission == 'train': save_pickle(aid2venueset, os.path.join(NEW_DATA_V2_DIR, 'aid2venueset.pkl')) elif mission == 'test': save_pickle(aid2venueset, os.path.join(NEW_DATA_DIR, 'aid2venueset.pkl')) # ------------------------------------------ # save format: aid2keywordsset--> {aid: set{key_word, key_word, ...}} aid2keywordsset = {} for aid in tqdm.tqdm(aid2pids.keys()): pids = aid2pids[aid] inner_set = set() for pid_with_index in pids: pid, index = pid_with_index.split('-') keywords = whole_author_profile_pub[pid].get('keywords', '') if len(keywords) == 0: continue for keyword in keywords: if keyword != '': keyword_set = set(keyword.lower().replace('-', ' ').split()) inner_set = inner_set | keyword_set aid2keywordsset[aid] = inner_set if mission == 'train': save_pickle(aid2keywordsset, os.path.join(NEW_DATA_V2_DIR, 'aid2keywordsset.pkl')) elif mission == 'test': save_pickle(aid2keywordsset, os.path.join(NEW_DATA_DIR, 'aid2keywordsset.pkl')) # ------------------------------------------ # save format: aid2orgwithyear --> {aid: [(org, year), () ...]} aid2orgwithyear = {} for aid in tqdm.tqdm(aid2pids.keys()): pids = aid2pids[aid] inner_list = [] for pid_with_index in pids: pid, index = pid_with_index.split('-') auhtors = whole_author_profile_pub[pid]['authors'] org = auhtors[int(index)].get('org', '').lower() year = whole_author_profile_pub[pid].get('year', '0') if year == '': year = 0 else: year = int(year) if any([year < 1500, year > 2100]): year = 0 inner_list.append((org, year)) aid2orgwithyear[aid] = inner_list if mission == 'train': save_pickle(aid2orgwithyear, os.path.join(NEW_DATA_V2_DIR, 'aid2orgwithyear.pkl')) elif mission == 'test': save_pickle(aid2orgwithyear, os.path.join(NEW_DATA_DIR, 'aid2orgwithyear.pkl')) # ------------------------------------------ # save format aid2yearinfo --> {aid: {year: { # orgs: [org, ....], # venues: [venues, ...], # keywords: [keyword, ...], # coauthors: [author-name, ...], # }}} aid2yearinfo = {} for aid in tqdm.tqdm(aid2pids.keys()): inner_dict = {} pids = aid2pids[aid] for pid_with_index in pids: pid, index = pid_with_index.split('-') year = whole_author_profile_pub[pid].get('year', '0') if year == '': year = 0 else: year = int(year) if any([year < 1500, year > 2100]): year = 0 authors = whole_author_profile_pub[pid]['authors'] authors_name = [clean_name(item['name']) for item in authors] org = [authors[int(index)].get('org', '').lower()] authors_name.pop(int(index)) coauthor = authors_name venue = [whole_author_profile_pub[pid].get('venue', '').lower()] keywords = whole_author_profile_pub[pid].get('keywords', ['']) if len(keywords) == 0: keywords = [''] keywords = [keyword.lower() for keyword in keywords] tmp_dict = { 'orgs': org, 'venues': venue, 'keywords': keywords, 'coauthors': coauthor, } if year in inner_dict.keys(): for key in tmp_dict: inner_dict[year][key].extend(tmp_dict[key]) else: inner_dict[year] = tmp_dict aid2yearinfo[aid] = inner_dict if mission == 'train': save_pickle(aid2yearinfo, os.path.join(NEW_DATA_V2_DIR, 'aid2yearinfo.pkl')) elif mission == 'test': save_pickle(aid2yearinfo, os.path.join(NEW_DATA_DIR, 'aid2yearinfo.pkl')) texttovec = TextToVec() # ------------------------------------------ # save format: aid2titlevec --> {aid: [mean value]} aid2titlevec = {} for aid in tqdm.tqdm(aid2pids.keys()): papers = aid2pids[aid] inner_list = [] for pid_with_index in papers: pid, index = pid_with_index.split('-') title = whole_author_profile_pub[pid]['title'] inner_list.append(texttovec.get_vec(title)) if len(inner_list) == 0: aid2titlevec[aid] = np.zeros(300) else: aid2titlevec[aid] = np.mean(np.array(inner_list), axis=0) if mission == 'train': save_pickle(aid2titlevec, os.path.join(NEW_DATA_V2_DIR, 'aid2titlevec.pkl')) elif mission == 'test': save_pickle(aid2titlevec, os.path.join(NEW_DATA_DIR, 'aid2titlevec.pkl')) # ------------------------------------------ # save format: aid2abstractvec --> {aid: [mean value]} aid2abstractvec = {} for aid in tqdm.tqdm(aid2pids.keys()): papers = aid2pids[aid] inner_list = [] for pid_with_index in papers: pid, index = pid_with_index.split('-') abstract = whole_author_profile_pub[pid].get('abstract') if abstract is None: continue inner_list.append(texttovec.get_vec(abstract)) if len(inner_list) == 0: aid2abstractvec[aid] = np.zeros(300) else: aid2abstractvec[aid] = np.mean(np.array(inner_list), axis=0) if mission == 'train': save_pickle(aid2abstractvec, os.path.join(NEW_DATA_V2_DIR, 'aid2abstractvec.pkl')) elif mission == 'test': save_pickle(aid2abstractvec, os.path.join(NEW_DATA_DIR, 'aid2abstractvec.pkl'))
def get_pid_with_index(whole_author_profile_pub, pid, name): authors = whole_author_profile_pub[pid]['authors'] authors_names = [clean_name(item['name']) for item in authors] index = get_name_index(name, authors_names) return '%s-%d' % (pid, index)
def get_all_trades(event, context): STABLE_COINS = ['USDT', 'USDC', 'PAX', 'BUSD', 'TUSD'] # Retrive traded pairs from events json_payload = json.loads(event['body'])['data'] binance_api_key = json_payload['API_KEY'] binance_api_secret = json_payload['API_SECRET'] traded_pairs = json_payload['TRADED_PAIRS'] binance_client = Client(api_key=binance_api_key, api_secret=binance_api_secret) # Get latest prices of each coin from s3 bucket bucket_name = environ.get("BUCKET") coin_file_names = environ.get("COIN_FILE_NAMES") client = boto3.client("s3") file = client.get_object(Bucket=bucket_name, Key=coin_file_names) coins_df = pd.read_csv(file["Body"], compression='gzip') prices = coins_df.set_index('symbol').to_dict()['price'] # Step 1: Get quantities of the coins order_history = [] reverse_order = [ ] # Suppose you trade DOTBNB , create a reverse order for BNB to "sell" qty_dict = {} qty_dict['BTC'] = 0 qty_dict['ETH'] = 0 for idx, pair in enumerate(traded_pairs): if idx == 200: sleep(30) orders = binance_client.get_all_orders(symbol=pair) order_history.extend(orders) quantity = 0 for order in orders: ex_qty = float(order['executedQty']) if order['side'] == 'BUY': quantity += ex_qty else: quantity -= ex_qty if ex_qty > 0: if pair.split('BTC')[-1] == "": reverse_coin = 'BTC' elif pair.split('BNB')[-1] == "": reverse_coin = 'BNB' elif pair.split('ETH')[-1] == "": reverse_coin = 'ETH' else: reverse_coin = False if reverse_coin: order_copy = order.copy() order_copy['time'] += 1 order_copy['updateTime'] += 1 order_copy['symbol'] = reverse_coin + "USDT" order_copy['executedQty'] = float( order_copy['price']) * float(order_copy['executedQty']) order_copy['side'] = "SELL" # This is for computing coin profit and loss order_copy['type'] = "REVERSE" reverse_order.append(order_copy) if orders: clean_coin_name = clean_name(pair) if clean_coin_name in qty_dict: qty_dict[clean_coin_name] += quantity else: qty_dict[clean_coin_name] = quantity # Step 1 Part 2: Resolve the reverse orders - this is very ugly code, but it works for now order_history.extend(reverse_order) for order in reverse_order: ex_qty = float(order['executedQty']) pair = order['symbol'] clean_coin_name = clean_name(pair) if order['side'] == 'BUY': qty_dict[clean_coin_name] += ex_qty else: qty_dict[clean_coin_name] -= ex_qty # Step 2: Get current prices of coins in possession price_dict = {} for clean_coin_name in list(qty_dict.keys()): price_dict[clean_coin_name] = prices[clean_coin_name + "USDT"] # Step 3: Construct assets dictionary assets = {} for k in qty_dict: assets[k] = [qty_dict[k], price_dict[k]] # Step 4: Get any stable coins position for stable_coin in STABLE_COINS: stable_coin_obj = binance_client.get_asset_balance(stable_coin) stable_coin_position = float(stable_coin_obj['free']) + float( stable_coin_obj['locked']) assets["USDT"] = [stable_coin_position, 1] frontend_formatted_assets = [] for coin in assets: temp = {} temp['Coin'] = coin temp['Quantity'] = assets[coin][0] temp['Price'] = assets[coin][1] temp['Value'] = round( float(temp['Price']) * float(temp['Quantity']), 3) if temp['Value'] > 5.0: frontend_formatted_assets.append(temp) response = { "statusCode": "200", "headers": { 'Access-Control-Allow-Origin': "*", 'Access-Control-Allow-Credentials': True, }, "body": json.dumps({ "assets": frontend_formatted_assets, "order_history": order_history, }) } return response
def gen_title_feature(): aid2titlevec = load_pickle(os.path.join(NEW_DATA_DIR, 'aid2titlevec.pkl')) test_unass = load_json(TEST_UNASS_PATH) test_pub = load_json(TEST_PUB_PATH) name2aids = load_pickle(os.path.join(NEW_DATA_DIR, 'name2aids.pkl')) texttovec = TextToVec() name_map = load_json( os.path.join(FINAL_DIR, 'name.different.modified.json')) original_name = [pair[0] for pair in name_map] changed_name = [pair[1] for pair in name_map] name_map2 = load_json( os.path.join(FINAL_DIR, 'name.different.2.modified.json')) original_name2 = [pair[0] for pair in name_map2] changed_name2 = [pair[1] for pair in name_map2] all_authors_name = list(name2aids.keys()) # test_title_feature --> {pid-with-index: {candidate-aids: [...], data: [(emb0, meb1), ...]}} test_title_feature = {} print('Gen title emb pair ...') for pid_with_index in tqdm.tqdm(test_unass): inner_dict = {} now_pid, index = pid_with_index.split('-') author_name = test_pub[now_pid]['authors'][int(index)]['name'] author_name = clean_name(author_name) if pid_with_index == 'ToCcabLT-1': author_name = 'junliang_wang' if pid_with_index == 'cVvvcFzj-1': author_name = 'xiaojun_liu' if author_name in original_name2: name_index = original_name2.index(author_name) author_name = changed_name2[name_index] elif author_name in original_name: name_index = original_name.index(author_name) author_name = changed_name[name_index] else: index = get_name_index(author_name, all_authors_name) author_name = all_authors_name[index] if isinstance(author_name, str): candidate_aids = name2aids[author_name] elif isinstance(author_name, list): candidate_aids = [] for name in author_name: candidate_aids.extend(name2aids[name].tolist()) candidate_aids = np.array(candidate_aids) else: raise ValueError("check author name !!!") inner_dict['candidate-aids'] = candidate_aids info = test_pub[now_pid].get('title') if info is None: emb = np.zeros(300) else: emb = texttovec.get_vec(info) data = [] for aid in candidate_aids: emb_pair = (aid2titlevec[aid], emb) data.append(emb_pair) inner_dict['data'] = data test_title_feature[pid_with_index] = inner_dict save_pickle( test_title_feature, os.path.join(TEST_FEATURE_DIR, 'test-title-emb-pair-name-clean-2.pkl')) print('Gen title distance ...') test_title_emb_pair = load_pickle( os.path.join(TEST_FEATURE_DIR, 'test-title-emb-pair-name-clean-2.pkl')) test_unass = load_json(TEST_UNASS_PATH) title_emb_pair = [] for pid_with_index in tqdm.tqdm(test_unass): for pair in test_title_emb_pair[pid_with_index]['data']: title_emb_pair.append(pair) emb_pair_to_distance( 'tm.title.1.checkpoint.pth', 'title', title_emb_pair, os.path.join(TEST_FEATURE_DIR, 'test-title-distance-df-name-clean-2.pkl'))
def retrieve_plot_data(self): '''retrieve default data from spec data file''' ''' data parser for 2-D mesh and hklmesh ''' label1, start1, end1, intervals1, label2, start2, end2, intervals2, time = self.scan.scanCmd.split( )[1:] if label1 not in self.scan.data: label1 = self.scan.L[0] # mnemonic v. name if label2 not in self.scan.data: label2 = self.scan.L[1] # mnemonic v. name axis1 = self.scan.data.get(label1) axis2 = self.scan.data.get(label2) intervals1, intervals2 = map(int, (intervals1, intervals2)) start1, end1, start2, end2, time = map( float, (start1, end1, start2, end2, time)) if len(axis1) < intervals1 and min(axis2) == max(axis2): # stopped scan before second row started, 1-D plot is better (issue #82) self.axes = [ label1, ] self.signal = self.scan.column_last self.data[label1] = self.scan.data[label1] self.data[self.signal] = self.scan.data[self.signal] return axis1 = axis1[0:intervals1 + 1] self.data[label1] = axis1 # 1-D array axis2 = [ axis2[row] for row in range(len(axis2)) if row % (intervals1 + 1) == 0 ] self.data[label2] = axis2 # 1-D array column_labels = self.scan.L column_labels.remove(label1) # special handling column_labels.remove(label2) # special handling if self.scan.scanCmd.startswith('hkl'): # find the reciprocal space axis held constant label3 = [key for key in ('H', 'K', 'L') if key in column_labels][0] self.data[label3] = self.scan.data.get(label3)[0] # constant # build 2-D data objects (do not build label1, label2, [or label3] as 2-D objects) data_shape = [len(axis2), len(axis1)] for label in column_labels: if label not in self.data: axis = numpy.array(self.scan.data.get(label)) self.data[label] = utils.reshape_data(axis, data_shape) else: pass self.signal = utils.clean_name(self.scan.column_last) self.axes = [label1, label2] if spec.MCA_DATA_KEY in self.scan.data: # 3-D array(s) # save each spectrum for key, spectrum in sorted( self.scan.data[spec.MCA_DATA_KEY].items()): num_channels = len(spectrum[0]) data_shape.append(num_channels) mca = numpy.array(spectrum) data = utils.reshape_data(mca, data_shape) channels = range(1, num_channels + 1) ds_name = '_' + key + '_' self.data[ds_name] = data self.data[ds_name + 'channel_'] = channels