def __init__(self, output_path, prod_name, wisdom_update={}): """ Initialize postprocessor with output parameters. :param output_path: path where postprocessing files are stored :param prod_name: name of manifest json file and prefix of all output files :param wisdom_update: an optional dictionary that maps variables to modifications requested in their visualization wisdom """ logging.info("Postprocessor: output_path=%s prod_name=%s" % (output_path, prod_name)) dump(wisdom_update, "Postprocessor: wisdom_update") self.output_path = output_path self.product_name = prod_name self.manifest = {} self.wisdom_update = wisdom_update # in case the manifest exists, load the existing version mf_path = os.path.join(output_path, prod_name + '.json') if osp.exists(mf_path): self.manifest = json.load(open(mf_path)) logging.info('postprocessor: Loaded manifest at %s' % mf_path) # dump(self.manifest,"postprocessor: manifest") else: logging.info('postprocessor: manifest at %s does not exist yet' % mf_path)
def on_receive_card(self, card): if card in self.cards: return self.cards.append(card) dump(self.filename("cards"), self.cards) self.ui.update_card(self.room_id, card)
def store_user_review(observe_t=12): store_user = {} store_review = {} store = utils.load("dicts/store.p") with open("dataset/yelp_academic_dataset_review.json", "r") as f: for line in f: line = json.loads(line) business_id = line["business_id"] if business_id not in store: continue review_id = line["review_id"] user_id = line["user_id"] date = datetime.strptime(line["date"], "%Y-%m-%d") if int((date-store[business_id]["start_t"]).days/30) <= observe_t: if business_id in store_review: store_review[business_id].append(review_id) else: store_review[business_id] = [review_id] if business_id in store_user: store_user[business_id].append(user_id) else: store_user[business_id] = [user_id] utils.dump(store_user, "dicts/store_user.p") utils.dump(store_review, "dicts/store_review.p")
def find(symbols): print(utils.make_bright("<find>")) matches = [] with sqlite3.connect(utils.get_libcs_db_filepath()) as conn: conn.row_factory = sqlite3.Row for libc in conn.execute("SELECT * FROM libcs"): libc_filepath = os.path.join(utils.get_libcs_dirpath(), libc["relpath"]) with open(libc_filepath, "rb") as f: elf = elftools.elf.elffile.ELFFile(f) dynsym_section = elf.get_section_by_name(".dynsym") for symbol, address in symbols: offset = address & 0xFFF try: libc_symbol = dynsym_section.get_symbol_by_name(symbol)[0] libc_offset = libc_symbol.entry.st_value & 0xFFF if libc_offset != offset: break except (IndexError, TypeError): break else: utils.dump(dict(libc)) matches.append(dict(libc)) print(utils.make_bright("</find>")) return matches
def _log(self, query): """ Saves only the last n (100) queries """ if query not in self.queryLog: self.queryLog.append(query) dump(self.queryLog[-100:], "./obj/log.pk")
def pair_dist(): store = utils.load("dicts/store.p") store_pair = utils.load("dicts/store_pair.p") pair_d = {} for busi_1 in store_pair: l = store_pair[busi_1] for busi_2 in l: if busi_1 < busi_2: small = busi_1 large = busi_2 else: small = busi_2 large = busi_1 tup = (small, large) if tup in pair_d: continue else: x1 = store[small]['latitude'] y1 = store[small]['longitude'] x2 = store[large]['latitude'] y2 = store[large]['longitude'] first = (x1, y1) second = (x2, y2) pair_d[tup] = vincenty(first, second).miles utils.dump(pair_d, "dicts/pair_dist.p")
def show_block(filename, offset): f = DatFile(filename) f.stream.seek(offset) block_data = f.stream.read(f.block_size) dump(block_data) print "---" dump(f.stream.read(0x40))
def preprocess(raw_data, dataset): print('parsing smiles as graphs...') processed_data = {'train': [], 'valid': []} file_count = 0 for section in ['train', 'valid']: all_smiles = [] # record all smiles in training dataset for i, (smiles, QED) in enumerate([(mol['smiles'], mol['QED']) for mol in raw_data[section]]): nodes, edges = to_graph(smiles, dataset) if len(edges) <= 0: continue processed_data[section].append({ 'targets': [[(QED)]], 'graph': edges, 'node_features': nodes, 'smiles': smiles }) all_smiles.append(smiles) if file_count % 2000 == 0: print('finished processing: %d' % file_count, end='\r') file_count += 1 print('%s: 100 %% ' % (section)) # save the dataset with open('molecules_%s_%s.json' % (section, dataset), 'w') as f: json.dump(processed_data[section], f) # save all molecules in the training dataset if section == 'train': utils.dump('smiles_%s.pkl' % dataset, all_smiles)
def preprocess(df): def token(text): st = LancasterStemmer() txt = nltk.word_tokenize(text.lower()) return [st.stem(word) for word in txt] top_speakers = df.groupby([c.TARGET]).size( ).loc[df.groupby([c.TARGET]).size() > 2000] main_char_lines = df.loc[df[c.TARGET].isin( top_speakers.index.values)] main_char_lines['Line'] = [line.replace( '\n', '') for line in main_char_lines['Line']] # stop = set(stopwords.words("english")) cv = CountVectorizer( # lowercase=True, tokenizer=token, # stop_words=stop, # token_pattern=u'(?u)\b\w\w+\b', analyzer=u'word', min_df=4) X = cv.fit_transform(main_char_lines['Line'].tolist()).toarray() le = LabelEncoder() y = le.fit_transform(main_char_lines[c.TARGET]) u.dump(cv, c.PATH_VECTORIZER) u.dump(le, c.PATH_ENCODER) return X, y
def dump_anim_file(entry): j, unk1, file_id, offset, size1, timestamp, version, size2, unk2 = entry print "%08X %08X %08X %s %08X | %08X %08X %08X | %08X" % (file_id, offset, size1, time.ctime(timestamp), version, size2, unk1, unk2, size2 - size1) f.stream.seek(offset) j, k, l, m, n = struct.unpack("<LLLHH", f.stream.read(0x10)) print "%08X %08X %08X %04X %04X" % (j, k, l, m, n) assert j == 0 assert k == 0 if m == 0xDA78: print "compressed" assert unk1 % 0x100 == 0x03 f.stream.seek(offset) data = f.stream.read(size1 + 0x08)[12:] content = zlib.decompress(data) assert l == len(content) else: print "uncompressed" assert unk1 % 0x100 == 0x02 f.stream.seek(offset) data = f.stream.read(size1 + 0x08)[8:] content = data dump(content[:0x100])
def get_tasks(request): utils.dump(request.GET) # date = datetime.strptime("Tue Oct 20 09:26:38 GMT 2015", '%a %b %d %H:%M:%S %Z %Y') # latest_task_list = Task.objects.all().filter(date=date).order_by('-date') latest_task_list = Task.objects.all().filter(isDeleted=False).order_by('-date') response = utils.build_obj_from_queryset(latest_task_list); return JsonResponse(response, safe=False)
def get_tasks_by_date(request): utils.dump(request.GET) #todo get with param, not Post date_filter = datetime.strptime(request.POST.get('date'), '%a %b %d %H:%M:%S %Z %Y') latest_task_list = Task.objects.all().filter(isDeleted=False, date__startswith=date_filter.date()).order_by('-date') response = utils.build_obj_from_queryset(latest_task_list); return JsonResponse(response, safe=False)
def main(run_id, data_path, score_as_pyfunc, score_as_tensorflow_lite): print("Options:") for k, v in locals().items(): print(f" {k}: {v}") utils.dump(run_id) data, _, _, _ = utils.build_data(data_path) model_uri = f"runs:/{run_id}/keras-hd5-model" predict_keras(model_uri, data) if score_as_pyfunc: predict_pyfunc(model_uri, data, "keras-hd5-model") model_name = "tensorflow-model" if artifact_exists(run_id, model_name): predict_tensorflow_model(run_id, data) else: print(f"WARNING: no model '{model_name}'") if score_as_tensorflow_lite: model_name = "tensorflow-lite-model" if artifact_exists(run_id, model_name): predict_tensorflow_lite_model(run_id, data) else: print(f"WARNING: no model '{model_name}'") model_name = "onnx-model" if artifact_exists(run_id, model_name): model_uri = f"runs:/{run_id}/'{model_name}'" predict_onnx(model_uri, data) predict_pyfunc(model_uri, data, "onnx-model") else: print(f"WARNING: no model '{model_name}'")
async def main(): call_args = get_args() config_env(call_args.random_seed, call_args.log_level) # Kaggle stores compressed files with ".zip" suffix dataset_file_path: Path = call_args.storage_path / (DATASET_FILE + ".zip") await load_dataset( ds_name=call_args.dataset_name, ds_file_name=DATASET_FILE, ds_file_path=dataset_file_path, ) embeddings_arch = call_args.storage_path / "glove.840B.300d.zip" emb_file_path = await get_embeddings(emb_arch_path=embeddings_arch) # Check if preprocessing could be skipped checksum_key = ( f"{dataset_file_path.name}_{DATASET_SIZE}_{emb_file_path.name}_{call_args.train_size}_{call_args.random_seed}" ) need_rerun = True if CHECKSUMS.get(checksum_key, None): need_rerun = False for f_name, checksum in CHECKSUMS.get(checksum_key).items(): f_path = call_args.work_store_path / f_name if not file_exists(f_path) or not verify_checksum(f_path, checksum): need_rerun = True break if need_rerun: objects_to_save = preprocess_data(dataset_file_path, DATASET_SIZE, emb_file_path, call_args.train_size) dump(dump_root=call_args.work_store_path, objects=objects_to_save) else: logging.info(f"All checksums match run configuration, skipping preprocessing.")
def get_tasks(request): utils.dump(request.GET) # date = datetime.strptime("Tue Oct 20 09:26:38 GMT 2015", '%a %b %d %H:%M:%S %Z %Y') # latest_task_list = Task.objects.all().filter(date=date).order_by('-date') latest_task_list = Task.objects.all().filter( isDeleted=False).order_by('-date') response = utils.build_obj_from_queryset(latest_task_list) return JsonResponse(response, safe=False)
def main(): i = 0 result = {} try: parse_categories() gen_embed_indexes() kv_array = specs.parse_files() while i < len(kv_array): print("----------------------------------------------") filename=kv_array[i]['filename'] #.lower().replace('.', '_') print("extracting key values from %s" %(kv_array[i]['filename'])) result[filename] = {} result[filename]['application'] = [] result[filename]['qos'] = [] result[filename]['device'] = [] for k,v in kv_array[i]['kv']: print("%s : %s" %(k, v)) if v == "": continue l = len(k.split()); korig = k if l > 1: #k = k.split()[l - 1] k = utils.k_get(k.strip(), l) try: scores = process(k.strip()) print(scores) max = 'device' if scores['device'] > scores['application'] else 'application' max = max if scores['qos'] < scores[max] else 'qos' print(max) k = utils.k_strip(korig.strip(), k.strip()) if k == None: continue print("--------") result[filename][max].append([k, v]) except Exception as e: print("Exception") print(e) pass i = i + 1 break except Exception as e: print("Exception") print(e) print("-------------") for f in result: print(f) for s in result[f]: out = s + " :" for k, v in result[f][s]: out = out + k + ":" #print(result[f][s]) utils.dump(out) print("---") create_excel_sheet(f, result[f])
def loadHead(self, stream): print "=== HEAD ===" head = GxStream(stream.getNext()) head.skip(2) # 40 00 - HEAD size s = head.getNext() while s != "": print utils.dump(s) s = head.getNext() stream.getNext() # 04 00 FF FF
def get_tasks_by_date(request): utils.dump(request.GET) #todo get with param, not Post date_filter = datetime.strptime(request.POST.get('date'), '%a %b %d %H:%M:%S %Z %Y') latest_task_list = Task.objects.all().filter( isDeleted=False, date__startswith=date_filter.date()).order_by('-date') response = utils.build_obj_from_queryset(latest_task_list) return JsonResponse(response, safe=False)
async def add_line(line: c.Line): print(line) extra_lines = pd.DataFrame([line.dict()]) global df print(df.shape) df = pd.concat([df, extra_lines], axis=0, ignore_index=True) print(df.shape) u.dump(df, c.PATH_TRAIN_EXTRA) return line
def show_file_block(filename, offset): f = DatFile(filename) f.stream.seek(offset) block_data = f.stream.read(f.block_size) zero1, zero2, file_id, size = struct.unpack("<LLLL", block_data[:0x10]) assert zero1 == 0 assert zero2 == 0 print("%08X %08X" % (file_id, size)) file_data = f.stream.read(size) dump(file_data[0x10:])
def test_formbot(): model_directory = "examples/formbot/models/nlu/current" interpreter = Interpreter.load(model_directory) text = "uh yes" result = interpreter.parse(text) utils.dump(result) text = "what about chinese food" result = interpreter.parse(text) utils.dump(result)
def load(self): # Load cards self.cards = load(self.filename("cards")) or [] self.ui.cards = self.cards[:] # Load secret key self.secret_key = load( self.filename("secret_key")) or generate_secret_key() self.secret_key = int(self.secret_key) dump(self.filename("secret_key"), self.secret_key)
def dump_image_file(entry): j, unk1, file_id, offset, size1, timestamp, version, size2, unk2 = entry f.stream.seek(offset) j, k, l, m, n = struct.unpack("<LLLHH", f.stream.read(0x10)) assert j == 0 assert k == 0 if m == 0xDA78: assert unk1 % 0x100 == 0x03 f.stream.seek(offset) data = f.stream.read(size1 + 0x08)[12:] content = zlib.decompress(data) assert l == len(content) header_id, unk1, width, height, unk2, lngth = struct.unpack( "<LLLLLL", content[:24]) assert lngth + 24 == l else: assert unk1 % 0x100 == 0x02 f.stream.seek(offset) data = f.stream.read(size1 + 0x08)[8:] header_id, unk1, width, height, unk2, lngth = struct.unpack( "<LLLLLL", data[:24]) assert lngth + 24 == size1 content = data print hex(file_id) if unk2 == 0x15: assert width * height * 4 == lngth image_0x15(header_id, width, height, content[24:]) elif unk2 == 0x14: assert width * height * 3 == lngth image_0x14(header_id, width, height, content[24:]) elif unk2 == 0x31545844: # DXT1 assert width * height == lngth * 2 image_0x31545844(header_id, width, height, content[24:]) elif unk2 == 0x33545844: # DXT3 assert width * height == lngth image_0x33545844(header_id, width, height, content[24:]) elif unk2 == 0x35545844: # DXT5 assert width * height == lngth image_0x35545844(header_id, width, height, content[24:]) elif unk2 == 0x1C: assert width * height == lngth image_0x1C(header_id, width, height, content[24:]) elif unk2 == 0x1F4: image_0x1F4(header_id, content[24:]) else: print "%08X %04X %04X" % (l, m, n) print "%08s %08s %08s %08s %08s %08s" % ("file_id", "unk1", "width", "height", "unk2", "lngth") print "%08X %08X %08X %08X %08X %08X" % (header_id, unk1, width, height, unk2, lngth) dump(content[24:])
def params(ctx): name = ctx['name'] build_root = ctx['build_root'] win_root = f'{build_root}/win' win_exe = f'{win_root}/{name}.exe' old = {k for k in ctx} ctx['win_root'] = win_root ctx['win_exe'] = win_exe dump(ctx, old)
def train_and_validate(model, X, y): X_train, X_val, y_train, y_val\ = train_test_split(X, y, test_size=0.3, random_state=42) model.fit(X_train, y_train) y_pred = model.predict(X_val) print(classification_report(y_pred, y_val)) model.fit(X, y) u.dump(model, c.PATH_MODEL) return model
async def add_lines(lines: List[c.Line]): print(lines) extra_lines = [] for line in lines: extra_lines.append(line.dict()) global df print(df.shape) df = pd.concat([df, pd.DataFrame(extra_lines)], axis=0, ignore_index=True) print(df.shape) u.dump(df, c.PATH_TRAIN_EXTRA) return line
def eval(test_loader, model, device): mse = torch.nn.MSELoss() mae = torch.nn.L1Loss() with torch.no_grad(): rmse_loss = 0 mae_loss = 0 mre_loss = 0 delta1_loss = 0 delta2_loss = 0 delta3_loss = 0 for i, (img, depth) in enumerate(test_loader): img, depth = img.to(device).float(), depth.to(device).float() depth = depth.unsqueeze(1) output = model(img) valid_mask = depth > 0 depth = depth[valid_mask] output = output[valid_mask] rmse_loss += mse(output, depth) * test_loader.batch_size mae_loss += mae(output, depth) * test_loader.batch_size mre_loss += mre(output, depth) * test_loader.batch_size delta1_loss += delta(output, depth, 1) * test_loader.batch_size delta2_loss += delta(output, depth, 2) * test_loader.batch_size delta3_loss += delta(output, depth, 3) * test_loader.batch_size image, depth_gt, depth_pred = resize_image_depth( img, depth, output) _, error_map = make_error_map(image, depth_gt.T, depth_pred.T) dump(image=image, depth=depth_pred.T, depth_gt=depth_gt.T, error_map=error_map, prefix='eval', n=i) N = len(test_loader) * test_loader.batch_size rmse_loss = torch.sqrt(rmse_loss/N) mae_loss = mae_loss / N mre_loss = mre_loss / N delta1_loss = delta1_loss / N delta2_loss = delta2_loss / N delta3_loss = delta3_loss / N print('RMSE: %f' % (rmse_loss,)) print('MAE: %f' % (mae_loss,)) print('MRE: %f' % (mre_loss,)) print('Delta1: %f' % (delta1_loss,)) print('Delta2: %f' % (delta2_loss,)) print('Delta3: %f' % (delta3_loss,))
def params(ctx): name = ctx['name'] build_root = ctx['build_root'] osx_root = f'{build_root}/osx' osx_dmg = f'{osx_root}/{name}.dmg' osx_app = f'{osx_root}/{name}.app' old = {k for k in ctx} ctx['osx_root'] = osx_root ctx['osx_dmg'] = osx_dmg ctx['osx_app'] = osx_app dump(ctx, old)
def add_steam_appid_txt(ctx): """Add steam_appid.txt for testing.""" win_root = ctx['win_root'] steam_appid = ctx['steam_appid'] win_steam_appid_txt = f'{win_root}/steam_appid.txt' with open(win_steam_appid_txt, 'wt') as f: f.write(f'{steam_appid}') old = {k for k in ctx} ctx['win_steam_appid_txt'] = win_steam_appid_txt dump(ctx, old)
def add_steam_appid_txt(ctx): """Add steam_appid.txt for testing.""" osx_app = ctx['osx_app'] steam_appid = ctx['steam_appid'] osx_steam_appid_txt = f'{osx_app}/Contents/MacOS/steam_appid.txt' with open(osx_steam_appid_txt, 'wt') as f: f.write(f'{steam_appid}') old = {k for k in ctx} ctx['osx_steam_appid_txt'] = osx_steam_appid_txt dump(ctx, old)
def dump_image_file(entry): j, unk1, file_id, offset, size1, timestamp, version, size2, unk2 = entry f.stream.seek(offset) j, k, l, m, n = struct.unpack("<LLLHH", f.stream.read(0x10)) assert j == 0 assert k == 0 if m == 0xDA78: assert unk1 % 0x100 == 0x03 f.stream.seek(offset) data = f.stream.read(size1 + 0x08)[12:] content = zlib.decompress(data) assert l == len(content) header_id, unk1, width, height, unk2, lngth = struct.unpack("<LLLLLL", content[:24]) assert lngth + 24 == l else: assert unk1 % 0x100 == 0x02 f.stream.seek(offset) data = f.stream.read(size1 + 0x08)[8:] header_id, unk1, width, height, unk2, lngth = struct.unpack("<LLLLLL", data[:24]) assert lngth + 24 == size1 content = data print hex(file_id) if unk2 == 0x15: assert width * height * 4 == lngth image_0x15(header_id, width, height, content[24:]) elif unk2 == 0x14: assert width * height * 3 == lngth image_0x14(header_id, width, height, content[24:]) elif unk2 == 0x31545844: # DXT1 assert width * height == lngth * 2 image_0x31545844(header_id, width, height, content[24:]) elif unk2 == 0x33545844: # DXT3 assert width * height == lngth image_0x33545844(header_id, width, height, content[24:]) elif unk2 == 0x35545844: # DXT5 assert width * height == lngth image_0x35545844(header_id, width, height, content[24:]) elif unk2 == 0x1C: assert width * height == lngth image_0x1C(header_id, width, height, content[24:]) elif unk2 == 0x1F4: image_0x1F4(header_id, content[24:]) else: print "%08X %04X %04X" % (l, m, n) print "%08s %08s %08s %08s %08s %08s" % ("file_id", "unk1", "width", "height", "unk2", "lngth") print "%08X %08X %08X %08X %08X %08X" % (header_id, unk1, width, height, unk2, lngth) dump(content[24:])
def train(self, train_set: pd.DataFrame, force: bool = False, save: bool = True) -> None: if not force and self.LogReg_pipeline is not None: return self.LogReg_pipeline = Pipeline([ ('tfidf', TfidfVectorizer(stop_words=self.stop_words)), ('clf', LogisticRegression(solver='sag')), ]) self.LogReg_pipeline.fit(train_set['comment_text'].map(lambda com: utils.preprocess_text(com)), train_set['bannable']) if save: utils.dump(self.LogReg_pipeline, "log_pipeline")
def predict(word_preds, stop_words=(), k=None, t='fr', query=False, lang=False, save_pred=None, load_pred=None): if load_pred: word_preds = load(load_pred) else: word_preds = list(map(list, word_preds)) for i in range(len(word_preds)): preds = list(filter(has_letter, map(clean, word_preds[i][1][:k]))) if stop_words: preds = [w for w in preds if w not in stop_words] if query: # filter src word clean_q = clean(word_preds[i][0]) preds = [p for p in preds if p != clean_q] if lang: # filter by target language preds = list(filter(is_lang(t), preds)) word_preds[i][1] = preds if save_pred: dump(word_preds, save_pred) pred_words_matrix = [ list(preds_to_words(wrd_pred, stop_words)) for _, wrd_pred in word_preds ] # src-word i -> (tfidf-word scores, tf-idf ngram scores) tfidf_results = tfidf_word_feats(pred_words_matrix) for (wrd, wrd_pred), (tfidf_words, tfidf_ngs) in \ zip(word_preds, tfidf_results): words_w_ranks = list(preds_to_words(wrd_pred, stop_words, rank=True)) yield word_row_feats( wrd, words_w_ranks, t, tfidf_words=tfidf_words, tfidf_ngrams=tfidf_ngs, )
def user(): user = {} with open("dataset/yelp_academic_dataset_user.json", "r") as f: for line in f: line = json.loads(line) user_id = line['user_id'] user[user_id] = {} # user[user_id]['review_cnt'] = line['review_count'] user[user_id]['yelp_since'] = line['yelping_since'] # user[user_id]['friends_cnt'] = len(line['friends']) # user[user_id]['fans'] = line['fans'] # user[user_id]['elite_year_cnt'] = len(line['elite']) user[user_id]['elite'] = line['elite'] user[user_id]['avg_stars'] = line['average_stars'] utils.dump(user, "dicts/user.p")
def insert(cls, *instances): """ Insert fact instances (overridden to handle Dimensions correctly) """ if instances: columns = [column for column in cls.__columns__ if not isinstance(column, AutoColumn)] sql = "%s INTO %s (\n %s\n)\n" % ( cls.INSERT, escaped(cls.__tablename__), ",\n ".join(escaped(column.name) for column in columns)) batches = cls.batch(instances) for iteration, batch in enumerate(batches, start=1): log.debug('Inserting batch %s' % (iteration), extra={"table": cls.__tablename__}) insert_statement = sql link = "VALUES" for instance in batch: values = [] for column in columns: value = instance[column.name] if isinstance(column, DimensionKey): if not value and column.optional: values.append(dump(value)) else: values.append( "(%s)" % column.dimension.__subquery__( value, instance.__dimension_selector__.timestamp(instance) # TODO This is a bit messy - shouldn't have to pass the instance back in. ) ) else: values.append(dump(value)) insert_statement += link + (" (\n %s\n)" % ",\n ".join(values)) link = "," connection = Warehouse.get() try: with closing(connection.cursor()) as cursor: cursor.execute(insert_statement) except Exception as e: classify_error(e) log.error(e) log.error(insert_statement) connection.rollback() else: connection.commit()
def search_list(self, page=1, page_size=10, query_params={}, sort_params={},pager_flag=True): if sort_params == {}: sort_params.update({"add_time": -1}) coll = self.get_coll() if pager_flag: length = coll.find(query_params).count() pager = utils.count_page(length, page, page_size) cr = coll.aggregate([ {"$match": query_params}, {"$sort": sort_params}, {"$skip": pager['skip']}, {"$limit": pager['page_size']}]) else: pager = utils.count_page(0, page, page_size) cr = coll.aggregate([ {"$match": query_params}, {"$sort": sort_params}]) objs = [] for obj in cr: obj = utils.dump(obj) try: for extra_param in self.extra_params: exec("""obj[extra_param] = self.get_%s(obj)"""%extra_param) objs.append(obj) except: objs.append(obj) return objs, pager
def prepare_fullha(config, ha_file): """ Prepare user.full_ha.file """ conf = yaml.load(ha_file) net_ip = ".".join((config['servers']['control-servers'][0]['ip'].split(".")[:3])) vipc = net_ip + ".253" conf["coe::base::controller_hostname"] = "control-server" conf["horizon::keystone_url"] = change_ip_to(conf["horizon::keystone_url"], vipc) conf["controller_names"] = [c["hostname"] for c in config['servers']['control-servers']] conf["openstack-ha::load-balancer::controller_ipaddresses"] = [c["ip"] for c in config['servers']['control-servers']] conf["openstack-ha::load-balancer::swift_proxy_ipaddresses"] = [c["ip"] for c in config['servers']['swift-proxy']] conf["openstack-ha::load-balancer::swift_proxy_names"] = [c["hostname"] for c in config['servers']['swift-proxy']] vipsw = net_ip + ".252" conf["openstack::swift::proxy::swift_proxy_net_ip"] = "%{ipaddress_eth2}" conf["openstack::swift::proxy::swift_memcache_servers"] = [i["ip"] + ":11211" for i in config['servers']['swift-proxy']] conf["nova::memcached_servers"] = [i["ip"] + ":11211" for i in config['servers']['control-servers']] conf["rabbit_hosts"] = [i["hostname"] + ":5672" for i in config['servers']['control-servers']] conf["galera::galera_servers"] = [c["ip"] for c in config['servers']['control-servers']] conf["galera::galera_master"] = config['servers']['control-servers'][0]["hostname"] + "." + DOMAIN_NAME conf["galera_master_name"] = config['servers']['control-servers'][0]["hostname"] conf["galera_master_ipaddress"] = config['servers']['control-servers'][0]["ip"] conf["galera_backup_names"] = [i["hostname"] for i in config['servers']['control-servers'][1:]] conf["galera_backup_ipaddresses"] = [i["ip"] for i in config['servers']['control-servers'][1:]] conf["openstack::swift::storage-node::storage_devices"] = ["vdb", "vdc", "vdd"] return dump(conf)
def create(self, **obj): coll = self.get_coll() curr_time = datetime.datetime.now() obj = json.loads(json.dumps(obj)) obj["add_time"] = str(curr_time) coll.insert_one(obj) return utils.dump(obj)
def __subquery__(cls, value, timestamp): """ Return a SQL SELECT query to use as a subquery within a fact INSERT. Does not append parentheses or a LIMIT clause. """ value_type = type(value) # We also check for subclasses for situations like basestring, which # matches on either str or unicode. natural_keys = [key for key in cls.__naturalkeys__ if (key.type is value_type or issubclass(value_type, key.type))] if not natural_keys: raise ValueError("Value type '%s' does not match type of any " "natural key for dimension " "'%s'" % (value_type.__name__, cls.__name__)) sql_template = ( 'SELECT {primary_key} FROM {table_name} ' 'WHERE {selector} ' 'AND `applicable_from` = (SELECT max(`applicable_from`) ' 'FROM {table_name} ' 'WHERE {selector} AND `applicable_from` <= "{timestamp}")' ) sql = sql_template.format( primary_key=escaped(cls.__primarykey__.name), table_name=escaped(cls.__tablename__), selector=" OR ".join("%s = %s" % (escaped(key.name), dump(value)) for key in natural_keys), timestamp=timestamp ) return sql
def erb(): working_dir = get_working_dir() log.setup(logging.DEBUG, path=working_dir) window_size = default_input('Window size', 3) n_nodes = default_input('N Nodes', 100) connectivity = default_input('Connectivity', 2) f = default_input('From', 0) t = default_input('To', n_nodes + 1) s = default_input('Step', n_nodes / 10) r = range(f, t, s) distribution = estimate_reservoir_distribution( 30, n_nodes, connectivity, r, window_size) name = '[NN:{}-WS:{}-K:{}]-distribution'.format(n_nodes, window_size, connectivity) dump(distribution, name, folder=working_dir)
def update(self, query_params, update_params): coll = self.get_coll() obj = coll.find_one(query_params) if obj: obj.update(update_params) ret = coll.save(obj) else: obj = {} return utils.dump(obj)
def affective(video_path, video_file, output_path): audio_path = output_path audio_file = video_file + ".wav" ret = extract_audio(video_path, video_file, audio_path, audio_file) if ret == 0: # normal return A, a = arousal.get_arousal(video_path, video_file, audio_path, audio_file) v = valence.get_valence(audio_path, audio_file, A, a) utils.dump(output_path, video_file + "_arousal.txt", a) utils.dump(output_path, video_file + "_valence.txt", v) utils.dump2(output_path + "/final/", video_file + "_final.txt" , v, a) plot_data.plot_data_pyplot(output_path + "/final/" + video_file + "_final.txt", v, a) try: os.remove(audio_path + audio_file) except OSError: print "Warning: file not removed" print "Finished"
def search(self, query_params): coll = self.get_coll() obj = coll.find_one(query_params) obj = utils.dump(obj) try: for extra_param in self.extra_params: exec ("""obj[extra_param] = self.get_%s(obj)""" % extra_param) except: pass return obj
def expression(self): s = [escaped(self.name), self.type_expression] if not self.optional: s.append("NOT NULL") default_expression = self.default_clause if default_expression: s.append(default_expression) if self.comment: s.append("COMMENT %s" % dump(self.comment)) return " ".join(s)
async def send_dispatch_event(self, event_type, guild, before=None, after=None): e = dict(ts=time(), type=event_type, producer=str(self), guild=dump(guild)) if before: if after: e['before'] = dump(before) e['after'] = dump(after) else: e['data'] = dump(before) self.log("{event}:{gid} @ {ts}".format(event=e['type'], gid=e['guild']['id'], ts=e['ts'])) await self.send('discord.events.{}'.format(e['type']), e)
def view_or_basicauth(view, request, *args, **kwargs): # Check for valid basic auth header utils.dump(request) if 'HTTP_AUTHORIZATION' in request.META: auth = request.META['HTTP_AUTHORIZATION'].split() if len(auth) == 2: if auth[0].lower() == "basic": uname, passwd = base64.b64decode(auth[1]).split(':') user = authenticate(username=uname, password=passwd) if user is not None and user.is_active: request.user = user return view(request, *args, **kwargs) # Either they did not provide an authorization header or # something in the authorization attempt failed. Send a 401 # back to them to ask them to authenticate. response = HttpResponse() response.status_code = 401 response['WWW-Authenticate'] = 'Basic realm="%s"' % "wtf" return response
def insert(cls, *instances): """ Insert one or more instances into the table as records. """ if instances: columns = [column for column in cls.__columns__ if not isinstance(column, AutoColumn)] sql = "%s INTO %s (\n %s\n)\n" % ( cls.INSERT, escaped(cls.__tablename__), ",\n ".join(escaped(column.name) for column in columns)) batches = cls.batch(instances) for iteration, batch in enumerate(batches, start=1): log.debug('Inserting batch %s' % (iteration), extra={"table": cls.__tablename__}) insert_statement = sql link = "VALUES" for instance in batch: values = [] for column in columns: value = instance[column.name] values.append(dump(value)) insert_statement += link + (" (\n %s\n)" % ",\n ".join(values)) link = "," for i in range(1, 3): connection = Warehouse.get() try: cursor = connection.cursor() cursor.execute(insert_statement) cursor.close() except Exception as e: classify_error(e) if e.__class__ == BrokenPipeError and i == 1: log.info( 'Trying once more with a fresh connection', extra={"table": cls.__tablename__} ) connection.close() else: log.error(e) return else: connection.commit() break log.debug('Finished updating %s' % cls.__tablename__, extra={"table": cls.__tablename__})
def _main(opts, args): level = logging.DEBUG if opts.debug else logging.INFO logging.basicConfig(format=FORMAT, level=level) if opts.quiet: logging.disable(logging.CRITICAL) logging.info('Imagy started') logging.debug(map(str, (args, opts))) if not opts.memorystore: store_path = opts.store_path if store_path is None: store_path = imagy_at_home = path('~').expanduser().joinpath(IMAGY_DIR_NAME) snippet = (' and backup files' if config.KEEP_ORIGINALS else '') msg = 'Using %s to store configuration%s, you can modify this path in config.py under STORE_PATH' logging.info(msg, imagy_at_home, snippet) store.load(store_path) args = [path(arg) for arg in args or FILE_PATTERNS if arg] run_daemon = opts.run if opts.clear: clear() elif opts.dump: dump(store) elif opts.revert: revert() elif opts.list: list_files() elif opts.files: do_files(*args) elif opts.deloriginals: delete_originals() elif opts.version: version() else: run_daemon = True if run_daemon: # if nothing specified so far, just run `smart mode` i.e. initialize the # directories and then run the daemon afterwards if not opts.no_init: initialize(*args) if not opts.no_watch: watch.watcher.run(*args)
def prepare_role(config, role_file): """ Prepare role_mappings file """ roles = {config["servers"]["build-server"]["hostname"]: "build"} for c in config["servers"]["control-servers"]: roles[c["hostname"]] = "controller" for c in config["servers"]["compute-servers"]: roles[c["hostname"]] = "compute" for c in config["servers"]["swift-storage"]: roles[c["hostname"]] = "swift_storage" for c in config["servers"]["swift-proxy"]: roles[c["hostname"]] = "swift_proxy" for c in config["servers"]["load-balancer"]: roles[c["hostname"]] = "load_balancer" return dump(roles)
def cache(url, path): print 'Downloading: %s' % urlparse.urlsplit(url).path page = requests.get(url, proxies=PROX) if page.status_code == 404: return {'url': url, 'uid': 'Error: 404', 'time': gmt_now()}, [] assert page.status_code == 200, 'Error: Status code error in page download, received %s @ %s' % (page.status_code, urlparse.urlsplit(url).path) tree = html.fromstring(page.content) links = list_links([str(x) for x in tree.xpath(r'//*[@href]/@href')], url=url) uid = dump(html.tostring(tree), path, verbose=True) pause() return {'url': url, 'uid': uid, 'time': gmt_now()}, links
def __init__(self, output_path, prod_name, wisdom_update = {}): """ Initialize postprocessor with output parameters. :param output_path: path where postprocessing files are stored :param prod_name: name of manifest json file and prefix of all output files :param wisdom_update: an optional dictionary that maps variables to modifications requested in their visualization wisdom """ logging.info("Postprocessor: output_path=%s prod_name=%s" % (output_path, prod_name)) dump(wisdom_update,"Postprocessor: wisdom_update") self.output_path = output_path self.product_name = prod_name self.manifest = {} self.wisdom_update = wisdom_update # in case the manifest exists, load the existing version mf_path = os.path.join(output_path, prod_name + '.json') if osp.exists(mf_path): self.manifest = json.load(open(mf_path)) logging.info('postprocessor: Loaded manifest at %s' % mf_path) # dump(self.manifest,"postprocessor: manifest") else: logging.info('postprocessor: manifest at %s does not exist yet' % mf_path)
def execute(cls, **params): database = getattr(cls, "database") query = getattr(cls, "query").format(**{key: dump(value) for key, value in params.items()}) with NamedConnection(database) as connection: with closing(connection.cursor(dictionary=True)) as cursor: cursor.execute(query) rows = [] for row in cursor: # Dump the rows immediately into memory, otherwise # the connection might timeout. rows.append(row) for row in rows: yield row
def prepare_common(config, common_file): """ Prepare user.common.file """ conf = yaml.load(common_file) net_ip = ".".join((config['servers']['control-server'][0]['ip'].split(".")[:3])) vipc = net_ip + ".253" conf["controller_public_address"] = vipc conf["controller_admin_address"] = vipc conf["controller_internal_address"] = vipc conf["coe::base::controller_hostname"] = "control-server" conf["domain_name"] = "domain.name" conf["ntp_servers"] = ["ntp.esl.cisco.com"] conf["external_interface"] = "eth4" conf["nova::compute::vncserver_proxyclient_address"] = "%{ipaddress_eth0}" conf["build_node_name"] = "build-server" conf["controller_public_url"] = change_ip_to( conf["controller_public_url"], vipc) conf["controller_admin_url"] = change_ip_to( conf["controller_admin_url"], vipc) conf["controller_internal_url"] = change_ip_to( conf["controller_internal_url"], vipc) conf["cobbler_node_ip"] = config['servers']['build-server'][0]['ip'] conf["node_subnet"] = ".".join(conf["cobbler_node_ip"].split(".")[:3]) + ".0" conf["node_gateway"] = ".".join(conf["cobbler_node_ip"].split(".")[:3]) + ".1" vipsw = ".".join((config['servers']['control-server'][0]['ip'].split(".")[:3])) + ".252" conf["swift_internal_address"] = vipsw conf["swift_public_address"] = vipsw conf["swift_admin_address"] = vipsw conf["swift_proxy_net_ip"] = "%{ipaddress_eth0}" conf['mysql::server::override_options']['mysqld']['bind-address'] = "0.0.0.0" # config['servers']['control-server'][0]['ip'] conf['swift_storage_interface'] = "eth0" conf['swift_local_net_ip'] = "%{ipaddress_eth0}" conf['internal_ip'] = "%{ipaddress_eth0}" conf['public_interface'] = "eth0" conf['private_interface'] = "eth0" conf['install_drive'] = "/dev/vda" conf['mon_initial_members'] = config['servers']['control-server'][0]["hostname"] conf['ceph_primary_mon'] = config['servers']['control-server'][0]["hostname"] conf['ceph_monitor_address'] = config['servers']['control-server'][0]["ip"] conf['ceph_cluster_interface'] = "eth0" conf['ceph_cluster_network'] = net_ip + ".0/24" conf['ceph_public_interface'] = "eth0" conf['ceph_public_network'] = net_ip + ".0/24" return dump(conf)
def cache_update(obj, path, links=[]): # headers = {'If-Modified-Since': obj['time']} headers = {} print 'Checking: %s' % urlparse.urlsplit(obj['url']).path page = requests.get(obj['url'], proxies=PROX, headers=headers) if page.status_code == 304: print 'No changes made...' else: assert page.status_code == 200, 'Error: Status code error in page download, received %s @ %s' % (page.status_code, urlparse.urlsplit(obj['url']).path) tree = html.fromstring(page.content) uid = dump(html.tostring(tree), path, verbose=True) links = list_links([str(x) for x in tree.xpath(r'//*[@href]/@href')], url=obj['url']) obj['time'] = gmt_now() return obj, links
def list(self,query_list,sort_list,use_pager=True,is_origin=False,page=1,page_size=options.page_size): length = self.get_coll().find(query_list).count() if use_pager: pager = utils.count_page(length,page,page_size) list = self.get_coll().aggregate([{"$match" : query_list}, {"$sort":sort_list}, {"$skip":pager['skip']}, {"$limit":pager['page_size']}]) else: pager = [] list = self.get_coll().aggregate([{"$match" : query_list}, {"$sort":sort_list} ]) if is_origin: return list,pager else: return utils.dump(list),pager
def prepare_new_files(config, path, use_sudo_flag): """ Prepare hostname specific files in puppet/data/hiera_data/hostname """ def write(text, path, filename, sudo): fd = StringIO(text) warn_if_fail(put(fd, os.path.join(path, filename), use_sudo=sudo)) warn_if_fail(put(fd, os.path.join(path, filename.replace("-", "_")), use_sudo=sudo)) for compute in config["servers"]["compute-servers"]: file_name = compute["hostname"] + ".yaml" ceph = {} ceph["cephdeploy::has_compute"] = True ceph["cephdeploy::osdwrapper::disks"] = ["vdb", "vdc", "vdd"] write(dump(ceph), path, file_name, use_sudo_flag) for num, lb in enumerate(config["servers"]["load-balancer"]): if num == 0: lb_text = ("openstack-ha::load-balancer::controller_state: MASTER\n" "openstack-ha::load-balancer::swift_proxy_state: BACKUP\n" ) else: lb_text = ("openstack-ha::load-balancer::controller_state: BACKUP\n" "openstack-ha::load-balancer::swift_proxy_state: MASTER\n" ) file_name = lb["hostname"] + ".yaml" write(lb_text, path, file_name, use_sudo_flag) for num, sw in enumerate(config["servers"]["swift-storage"]): sw_text = ( 'openstack::swift::storage-node::swift_zone: {num}\n' 'coe::network::interface::interface_name: "%{{swift_storage_interface}}"\n' 'coe::network::interface::ipaddress: "%{{swift_local_net_ip}}"\n' 'coe::network::interface::netmask: "%{{swift_storage_netmask}}"\n'.format(num=num+1) ) file_name = sw["hostname"] + ".yaml" write(sw_text, path, file_name, use_sudo_flag) file_name = config["servers"]["build-server"]["hostname"] + ".yaml" b_text = "apache::default_vhost: true" write(b_text, path, file_name, use_sudo_flag)
return word_counter """ Sample: Instantiate this directly and call whatever you want """ if __name__ == "__main__": toc = 'C:/data/books/game_of_thrones/A_Game_Of_Thrones_split_001.html' book = ebup_parser(table_of_contents=toc) overall = Counter() for a in range(3, book.num_chapters): text = book.get_chapter_contents(a) current_chapter_words = get_word_counter(text) overall += current_chapter_words all_words = dict(overall.most_common(200)) dump(all_words)