Example #1
0
    def __init__(self, output_path, prod_name, wisdom_update={}):
        """
        Initialize postprocessor with output parameters.

        :param output_path: path where postprocessing files are stored
        :param prod_name: name of manifest json file and prefix of all output files
        :param wisdom_update: an optional dictionary that maps variables to
                              modifications requested in their visualization wisdom
        """
        logging.info("Postprocessor: output_path=%s prod_name=%s" %
                     (output_path, prod_name))
        dump(wisdom_update, "Postprocessor: wisdom_update")
        self.output_path = output_path
        self.product_name = prod_name
        self.manifest = {}
        self.wisdom_update = wisdom_update

        # in case the manifest exists, load the existing version
        mf_path = os.path.join(output_path, prod_name + '.json')
        if osp.exists(mf_path):
            self.manifest = json.load(open(mf_path))
            logging.info('postprocessor: Loaded manifest at %s' % mf_path)
            # dump(self.manifest,"postprocessor: manifest")
        else:
            logging.info('postprocessor: manifest at %s does not exist yet' %
                         mf_path)
Example #2
0
    def on_receive_card(self, card):
        if card in self.cards:
            return

        self.cards.append(card)
        dump(self.filename("cards"), self.cards)
        self.ui.update_card(self.room_id, card)
Example #3
0
def store_user_review(observe_t=12):
    store_user = {}
    store_review = {}

    store = utils.load("dicts/store.p")

    with open("dataset/yelp_academic_dataset_review.json", "r") as f:
        for line in f:
            line = json.loads(line)
            business_id = line["business_id"]
            if business_id not in store:
                continue
            review_id = line["review_id"]
            user_id = line["user_id"]
            date = datetime.strptime(line["date"], "%Y-%m-%d")
            if int((date-store[business_id]["start_t"]).days/30) <= observe_t:
                if business_id in store_review:
                    store_review[business_id].append(review_id)
                else:
                    store_review[business_id] = [review_id]
                if business_id in store_user:
                    store_user[business_id].append(user_id)
                else:
                    store_user[business_id] = [user_id]

    utils.dump(store_user, "dicts/store_user.p")
    utils.dump(store_review, "dicts/store_review.p")
Example #4
0
def find(symbols):
    print(utils.make_bright("<find>"))

    matches = []
    with sqlite3.connect(utils.get_libcs_db_filepath()) as conn:
        conn.row_factory = sqlite3.Row
        for libc in conn.execute("SELECT * FROM libcs"):
            libc_filepath = os.path.join(utils.get_libcs_dirpath(), libc["relpath"])
            with open(libc_filepath, "rb") as f:
                elf = elftools.elf.elffile.ELFFile(f)
                dynsym_section = elf.get_section_by_name(".dynsym")
                for symbol, address in symbols:
                    offset = address & 0xFFF
                    try:
                        libc_symbol = dynsym_section.get_symbol_by_name(symbol)[0]
                        libc_offset = libc_symbol.entry.st_value & 0xFFF
                        if libc_offset != offset:
                            break
                    except (IndexError, TypeError):
                        break
                else:
                    utils.dump(dict(libc))
                    matches.append(dict(libc))

    print(utils.make_bright("</find>"))
    return matches
Example #5
0
 def _log(self, query):
     """
     Saves only the last n (100) queries
     """
     if query not in self.queryLog:
         self.queryLog.append(query)
         dump(self.queryLog[-100:], "./obj/log.pk")
Example #6
0
def pair_dist():

    store = utils.load("dicts/store.p")
    store_pair = utils.load("dicts/store_pair.p")

    pair_d = {}

    for busi_1 in store_pair:
        l = store_pair[busi_1]
        for busi_2 in l:
            if busi_1 < busi_2:
                small = busi_1
                large = busi_2
            else:
                small = busi_2
                large = busi_1
            tup = (small, large)
            if tup in pair_d:
                continue
            else:
                x1 = store[small]['latitude']
                y1 = store[small]['longitude']
                x2 = store[large]['latitude']
                y2 = store[large]['longitude']
                first = (x1, y1)
                second = (x2, y2)
                pair_d[tup] = vincenty(first, second).miles
    utils.dump(pair_d, "dicts/pair_dist.p")
Example #7
0
def show_block(filename, offset):
    f = DatFile(filename)
    f.stream.seek(offset)
    block_data = f.stream.read(f.block_size)
    dump(block_data)
    print "---"
    dump(f.stream.read(0x40))
Example #8
0
def preprocess(raw_data, dataset):
    print('parsing smiles as graphs...')
    processed_data = {'train': [], 'valid': []}

    file_count = 0
    for section in ['train', 'valid']:
        all_smiles = []  # record all smiles in training dataset
        for i, (smiles, QED) in enumerate([(mol['smiles'], mol['QED'])
                                           for mol in raw_data[section]]):
            nodes, edges = to_graph(smiles, dataset)
            if len(edges) <= 0:
                continue
            processed_data[section].append({
                'targets': [[(QED)]],
                'graph': edges,
                'node_features': nodes,
                'smiles': smiles
            })
            all_smiles.append(smiles)
            if file_count % 2000 == 0:
                print('finished processing: %d' % file_count, end='\r')
            file_count += 1
        print('%s: 100 %%      ' % (section))
        # save the dataset
        with open('molecules_%s_%s.json' % (section, dataset), 'w') as f:
            json.dump(processed_data[section], f)
        # save all molecules in the training dataset
        if section == 'train':
            utils.dump('smiles_%s.pkl' % dataset, all_smiles)
Example #9
0
def show_block(filename, offset):
    f = DatFile(filename)
    f.stream.seek(offset)
    block_data = f.stream.read(f.block_size)
    dump(block_data)
    print "---"
    dump(f.stream.read(0x40))
Example #10
0
def preprocess(df):
    def token(text):
        st = LancasterStemmer()
        txt = nltk.word_tokenize(text.lower())
        return [st.stem(word) for word in txt]

    top_speakers = df.groupby([c.TARGET]).size(
    ).loc[df.groupby([c.TARGET]).size() > 2000]

    main_char_lines = df.loc[df[c.TARGET].isin(
        top_speakers.index.values)]

    main_char_lines['Line'] = [line.replace(
        '\n', '') for line in main_char_lines['Line']]

    # stop = set(stopwords.words("english"))
    cv = CountVectorizer(  # lowercase=True,
        tokenizer=token,  # stop_words=stop, # token_pattern=u'(?u)\b\w\w+\b',
        analyzer=u'word', min_df=4)

    X = cv.fit_transform(main_char_lines['Line'].tolist()).toarray()

    le = LabelEncoder()
    y = le.fit_transform(main_char_lines[c.TARGET])

    u.dump(cv, c.PATH_VECTORIZER)
    u.dump(le, c.PATH_ENCODER)

    return X, y
Example #11
0
def dump_anim_file(entry):
    j, unk1, file_id, offset, size1, timestamp, version, size2, unk2 = entry
    print "%08X %08X %08X %s %08X | %08X %08X %08X | %08X" % (file_id, offset, size1, time.ctime(timestamp), version, size2, unk1, unk2, size2 - size1)
    
    f.stream.seek(offset)
    
    j, k, l, m, n = struct.unpack("<LLLHH", f.stream.read(0x10))
    print "%08X %08X %08X %04X %04X" % (j, k, l, m, n)
    
    assert j == 0
    assert k == 0
    
    if m == 0xDA78:
        print "compressed"
        assert unk1 % 0x100 == 0x03
        f.stream.seek(offset)
        data = f.stream.read(size1 + 0x08)[12:]
        content = zlib.decompress(data)
        assert l == len(content)
    else:
        print "uncompressed"
        assert unk1 % 0x100 == 0x02
        f.stream.seek(offset)
        data = f.stream.read(size1 + 0x08)[8:]
        content = data
    
    dump(content[:0x100])
Example #12
0
def get_tasks(request):
    utils.dump(request.GET)
    # date = datetime.strptime("Tue Oct 20 09:26:38 GMT 2015",  '%a %b %d %H:%M:%S %Z %Y')
    # latest_task_list = Task.objects.all().filter(date=date).order_by('-date')
    latest_task_list = Task.objects.all().filter(isDeleted=False).order_by('-date')
    response = utils.build_obj_from_queryset(latest_task_list);
    return JsonResponse(response, safe=False)
Example #13
0
def dump_anim_file(entry):
    j, unk1, file_id, offset, size1, timestamp, version, size2, unk2 = entry
    print "%08X %08X %08X %s %08X | %08X %08X %08X | %08X" % (file_id, offset, size1, time.ctime(timestamp), version, size2, unk1, unk2, size2 - size1)

    f.stream.seek(offset)

    j, k, l, m, n = struct.unpack("<LLLHH", f.stream.read(0x10))
    print "%08X %08X %08X %04X %04X" % (j, k, l, m, n)

    assert j == 0
    assert k == 0

    if m == 0xDA78:
        print "compressed"
        assert unk1 % 0x100 == 0x03
        f.stream.seek(offset)
        data = f.stream.read(size1 + 0x08)[12:]
        content = zlib.decompress(data)
        assert l == len(content)
    else:
        print "uncompressed"
        assert unk1 % 0x100 == 0x02
        f.stream.seek(offset)
        data = f.stream.read(size1 + 0x08)[8:]
        content = data

    dump(content[:0x100])
Example #14
0
def get_tasks_by_date(request):
    utils.dump(request.GET)
    #todo get with param, not Post
    date_filter = datetime.strptime(request.POST.get('date'),  '%a %b %d %H:%M:%S %Z %Y')
    latest_task_list = Task.objects.all().filter(isDeleted=False, date__startswith=date_filter.date()).order_by('-date')
    response = utils.build_obj_from_queryset(latest_task_list);
    return JsonResponse(response, safe=False)
Example #15
0
def main(run_id, data_path, score_as_pyfunc, score_as_tensorflow_lite):
    print("Options:")
    for k, v in locals().items():
        print(f"  {k}: {v}")

    utils.dump(run_id)
    data, _, _, _ = utils.build_data(data_path)

    model_uri = f"runs:/{run_id}/keras-hd5-model"
    predict_keras(model_uri, data)
    if score_as_pyfunc:
        predict_pyfunc(model_uri, data, "keras-hd5-model")

    model_name = "tensorflow-model"
    if artifact_exists(run_id, model_name):
        predict_tensorflow_model(run_id, data)
    else:
        print(f"WARNING: no model '{model_name}'")

    if score_as_tensorflow_lite:
        model_name = "tensorflow-lite-model"
        if artifact_exists(run_id, model_name):
            predict_tensorflow_lite_model(run_id, data)
        else:
            print(f"WARNING: no model '{model_name}'")

    model_name = "onnx-model"
    if artifact_exists(run_id, model_name):
        model_uri = f"runs:/{run_id}/'{model_name}'"
        predict_onnx(model_uri, data)
        predict_pyfunc(model_uri, data, "onnx-model")
    else:
        print(f"WARNING: no model '{model_name}'")
Example #16
0
async def main():
    call_args = get_args()
    config_env(call_args.random_seed, call_args.log_level)

    # Kaggle stores compressed files with ".zip" suffix
    dataset_file_path: Path = call_args.storage_path / (DATASET_FILE + ".zip")
    await load_dataset(
        ds_name=call_args.dataset_name,
        ds_file_name=DATASET_FILE,
        ds_file_path=dataset_file_path,
    )

    embeddings_arch = call_args.storage_path / "glove.840B.300d.zip"
    emb_file_path = await get_embeddings(emb_arch_path=embeddings_arch)

    # Check if preprocessing could be skipped
    checksum_key = (
        f"{dataset_file_path.name}_{DATASET_SIZE}_{emb_file_path.name}_{call_args.train_size}_{call_args.random_seed}"
    )
    need_rerun = True
    if CHECKSUMS.get(checksum_key, None):
        need_rerun = False
        for f_name, checksum in CHECKSUMS.get(checksum_key).items():
            f_path = call_args.work_store_path / f_name
            if not file_exists(f_path) or not verify_checksum(f_path, checksum):
                need_rerun = True
                break
    if need_rerun:
        objects_to_save = preprocess_data(dataset_file_path, DATASET_SIZE, emb_file_path, call_args.train_size)
        dump(dump_root=call_args.work_store_path, objects=objects_to_save)
    else:
        logging.info(f"All checksums match run configuration, skipping preprocessing.")
Example #17
0
def get_tasks(request):
    utils.dump(request.GET)
    # date = datetime.strptime("Tue Oct 20 09:26:38 GMT 2015",  '%a %b %d %H:%M:%S %Z %Y')
    # latest_task_list = Task.objects.all().filter(date=date).order_by('-date')
    latest_task_list = Task.objects.all().filter(
        isDeleted=False).order_by('-date')
    response = utils.build_obj_from_queryset(latest_task_list)
    return JsonResponse(response, safe=False)
Example #18
0
def main():
    i = 0
    result = {}
    try:
        parse_categories()
        gen_embed_indexes()
        kv_array = specs.parse_files()
        while i < len(kv_array):
            print("----------------------------------------------")
            filename=kv_array[i]['filename'] #.lower().replace('.', '_')
            print("extracting key values from  %s" %(kv_array[i]['filename']))
            result[filename] = {}
            result[filename]['application'] = []
            result[filename]['qos'] = []
            result[filename]['device'] = []

            for k,v in kv_array[i]['kv']:
                print("%s : %s" %(k, v))
                if v == "":
                    continue
                l = len(k.split());
                korig = k
                if l > 1:
                    #k = k.split()[l - 1]
                    k = utils.k_get(k.strip(), l)
                try:
                    scores = process(k.strip())
                    print(scores)
                    max = 'device' if  scores['device'] > scores['application'] else 'application'
                    max = max if scores['qos'] < scores[max] else 'qos'
                    print(max)
                    k = utils.k_strip(korig.strip(), k.strip())
                    if k == None:
                        continue
                    print("--------")
                    result[filename][max].append([k, v])
                except Exception as e:
                    print("Exception")
                    print(e)
                    pass
            i = i + 1
            break

    except Exception as e:
        print("Exception")
        print(e)

    print("-------------")
    for f in result:
        print(f)
        for s in result[f]:
            out = s + "    :"
            for k, v in result[f][s]:
                out = out + k + ":"
            #print(result[f][s])
            utils.dump(out)
        print("---")
        create_excel_sheet(f, result[f])
Example #19
0
 def loadHead(self, stream):
     print "=== HEAD ==="
     head = GxStream(stream.getNext())
     head.skip(2)  # 40 00 - HEAD size
     s = head.getNext()
     while s != "":
         print utils.dump(s)
         s = head.getNext()
     stream.getNext()  # 04 00 FF FF
Example #20
0
def get_tasks_by_date(request):
    utils.dump(request.GET)
    #todo get with param, not Post
    date_filter = datetime.strptime(request.POST.get('date'),
                                    '%a %b %d %H:%M:%S %Z %Y')
    latest_task_list = Task.objects.all().filter(
        isDeleted=False, date__startswith=date_filter.date()).order_by('-date')
    response = utils.build_obj_from_queryset(latest_task_list)
    return JsonResponse(response, safe=False)
Example #21
0
async def add_line(line: c.Line):
    print(line)
    extra_lines = pd.DataFrame([line.dict()])

    global df
    print(df.shape)
    df = pd.concat([df, extra_lines], axis=0, ignore_index=True)
    print(df.shape)
    u.dump(df, c.PATH_TRAIN_EXTRA)
    return line
Example #22
0
def show_file_block(filename, offset):
    f = DatFile(filename)
    f.stream.seek(offset)
    block_data = f.stream.read(f.block_size)
    zero1, zero2, file_id, size = struct.unpack("<LLLL", block_data[:0x10])
    assert zero1 == 0
    assert zero2 == 0
    print("%08X %08X" % (file_id, size))
    file_data = f.stream.read(size)
    dump(file_data[0x10:])
Example #23
0
def show_file_block(filename, offset):
    f = DatFile(filename)
    f.stream.seek(offset)
    block_data = f.stream.read(f.block_size)
    zero1, zero2, file_id, size = struct.unpack("<LLLL", block_data[:0x10])
    assert zero1 == 0
    assert zero2 == 0
    print("%08X %08X" % (file_id, size))
    file_data = f.stream.read(size)
    dump(file_data[0x10:])
Example #24
0
def test_formbot():
    model_directory = "examples/formbot/models/nlu/current"
    interpreter = Interpreter.load(model_directory)
    text = "uh yes"
    result = interpreter.parse(text)
    utils.dump(result)

    text = "what about chinese food"
    result = interpreter.parse(text)
    utils.dump(result)
Example #25
0
    def load(self):
        # Load cards
        self.cards = load(self.filename("cards")) or []
        self.ui.cards = self.cards[:]

        # Load secret key
        self.secret_key = load(
            self.filename("secret_key")) or generate_secret_key()
        self.secret_key = int(self.secret_key)
        dump(self.filename("secret_key"), self.secret_key)
Example #26
0
def dump_image_file(entry):
    j, unk1, file_id, offset, size1, timestamp, version, size2, unk2 = entry

    f.stream.seek(offset)
    j, k, l, m, n = struct.unpack("<LLLHH", f.stream.read(0x10))

    assert j == 0
    assert k == 0

    if m == 0xDA78:
        assert unk1 % 0x100 == 0x03
        f.stream.seek(offset)
        data = f.stream.read(size1 + 0x08)[12:]
        content = zlib.decompress(data)
        assert l == len(content)
        header_id, unk1, width, height, unk2, lngth = struct.unpack(
            "<LLLLLL", content[:24])
        assert lngth + 24 == l
    else:
        assert unk1 % 0x100 == 0x02
        f.stream.seek(offset)
        data = f.stream.read(size1 + 0x08)[8:]
        header_id, unk1, width, height, unk2, lngth = struct.unpack(
            "<LLLLLL", data[:24])
        assert lngth + 24 == size1
        content = data

    print hex(file_id)
    if unk2 == 0x15:
        assert width * height * 4 == lngth
        image_0x15(header_id, width, height, content[24:])
    elif unk2 == 0x14:
        assert width * height * 3 == lngth
        image_0x14(header_id, width, height, content[24:])
    elif unk2 == 0x31545844:  # DXT1
        assert width * height == lngth * 2
        image_0x31545844(header_id, width, height, content[24:])
    elif unk2 == 0x33545844:  # DXT3
        assert width * height == lngth
        image_0x33545844(header_id, width, height, content[24:])
    elif unk2 == 0x35545844:  # DXT5
        assert width * height == lngth
        image_0x35545844(header_id, width, height, content[24:])
    elif unk2 == 0x1C:
        assert width * height == lngth
        image_0x1C(header_id, width, height, content[24:])
    elif unk2 == 0x1F4:
        image_0x1F4(header_id, content[24:])
    else:
        print "%08X %04X %04X" % (l, m, n)
        print "%08s %08s %08s %08s %08s %08s" % ("file_id", "unk1", "width",
                                                 "height", "unk2", "lngth")
        print "%08X %08X %08X %08X %08X %08X" % (header_id, unk1, width,
                                                 height, unk2, lngth)
        dump(content[24:])
Example #27
0
def params(ctx):
    name = ctx['name']
    build_root = ctx['build_root']

    win_root = f'{build_root}/win'
    win_exe = f'{win_root}/{name}.exe'

    old = {k for k in ctx}
    ctx['win_root'] = win_root
    ctx['win_exe'] = win_exe
    dump(ctx, old)
Example #28
0
def train_and_validate(model, X, y):
    X_train, X_val, y_train, y_val\
        = train_test_split(X, y, test_size=0.3, random_state=42)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    print(classification_report(y_pred, y_val))

    model.fit(X, y)
    u.dump(model, c.PATH_MODEL)
    return model
Example #29
0
async def add_lines(lines: List[c.Line]):
    print(lines)
    extra_lines = []
    for line in lines:
        extra_lines.append(line.dict())

    global df
    print(df.shape)
    df = pd.concat([df, pd.DataFrame(extra_lines)], axis=0, ignore_index=True)
    print(df.shape)
    u.dump(df, c.PATH_TRAIN_EXTRA)
    return line
Example #30
0
def eval(test_loader, model, device):
    mse = torch.nn.MSELoss()
    mae = torch.nn.L1Loss()

    with torch.no_grad():
        rmse_loss = 0
        mae_loss = 0
        mre_loss = 0
        delta1_loss = 0
        delta2_loss = 0
        delta3_loss = 0
        for i, (img, depth) in enumerate(test_loader):
            img, depth = img.to(device).float(), depth.to(device).float()
            depth = depth.unsqueeze(1)
            output = model(img)

            valid_mask = depth > 0
            depth = depth[valid_mask]
            output = output[valid_mask]

            rmse_loss += mse(output, depth) * test_loader.batch_size
            mae_loss += mae(output, depth) * test_loader.batch_size
            mre_loss += mre(output, depth) * test_loader.batch_size
            delta1_loss += delta(output, depth, 1) * test_loader.batch_size
            delta2_loss += delta(output, depth, 2) * test_loader.batch_size
            delta3_loss += delta(output, depth, 3) * test_loader.batch_size

            image, depth_gt, depth_pred = resize_image_depth(
                img, depth, output)

            _, error_map = make_error_map(image, depth_gt.T, depth_pred.T)
            dump(image=image,
                 depth=depth_pred.T,
                 depth_gt=depth_gt.T,
                 error_map=error_map,
                 prefix='eval',
                 n=i)

        N = len(test_loader) * test_loader.batch_size
        rmse_loss = torch.sqrt(rmse_loss/N)
        mae_loss = mae_loss / N
        mre_loss = mre_loss / N
        delta1_loss = delta1_loss / N
        delta2_loss = delta2_loss / N
        delta3_loss = delta3_loss / N

        print('RMSE: %f' % (rmse_loss,))
        print('MAE: %f' % (mae_loss,))
        print('MRE: %f' % (mre_loss,))
        print('Delta1: %f' % (delta1_loss,))
        print('Delta2: %f' % (delta2_loss,))
        print('Delta3: %f' % (delta3_loss,))
Example #31
0
def params(ctx):
    name = ctx['name']
    build_root = ctx['build_root']

    osx_root = f'{build_root}/osx'
    osx_dmg = f'{osx_root}/{name}.dmg'
    osx_app = f'{osx_root}/{name}.app'

    old = {k for k in ctx}
    ctx['osx_root'] = osx_root
    ctx['osx_dmg'] = osx_dmg
    ctx['osx_app'] = osx_app
    dump(ctx, old)
Example #32
0
def add_steam_appid_txt(ctx):
    """Add steam_appid.txt for testing."""

    win_root = ctx['win_root']
    steam_appid = ctx['steam_appid']

    win_steam_appid_txt = f'{win_root}/steam_appid.txt'
    with open(win_steam_appid_txt, 'wt') as f:
        f.write(f'{steam_appid}')

    old = {k for k in ctx}
    ctx['win_steam_appid_txt'] = win_steam_appid_txt
    dump(ctx, old)
Example #33
0
def add_steam_appid_txt(ctx):
    """Add steam_appid.txt for testing."""

    osx_app = ctx['osx_app']
    steam_appid = ctx['steam_appid']

    osx_steam_appid_txt = f'{osx_app}/Contents/MacOS/steam_appid.txt'
    with open(osx_steam_appid_txt, 'wt') as f:
        f.write(f'{steam_appid}')

    old = {k for k in ctx}
    ctx['osx_steam_appid_txt'] = osx_steam_appid_txt
    dump(ctx, old)
Example #34
0
def dump_image_file(entry):
    j, unk1, file_id, offset, size1, timestamp, version, size2, unk2 = entry

    f.stream.seek(offset)
    j, k, l, m, n = struct.unpack("<LLLHH", f.stream.read(0x10))

    assert j == 0
    assert k == 0

    if m == 0xDA78:
        assert unk1 % 0x100 == 0x03
        f.stream.seek(offset)
        data = f.stream.read(size1 + 0x08)[12:]
        content = zlib.decompress(data)
        assert l == len(content)
        header_id, unk1, width, height, unk2, lngth = struct.unpack("<LLLLLL", content[:24])
        assert lngth + 24 == l
    else:
        assert unk1 % 0x100 == 0x02
        f.stream.seek(offset)
        data = f.stream.read(size1 + 0x08)[8:]
        header_id, unk1, width, height, unk2, lngth = struct.unpack("<LLLLLL", data[:24])
        assert lngth + 24 == size1
        content = data

    print hex(file_id)
    if unk2 == 0x15:
        assert width * height * 4 == lngth
        image_0x15(header_id, width, height, content[24:])
    elif unk2 == 0x14:
        assert width * height * 3 == lngth
        image_0x14(header_id, width, height, content[24:])
    elif unk2 == 0x31545844:  # DXT1
        assert width * height == lngth * 2
        image_0x31545844(header_id, width, height, content[24:])
    elif unk2 == 0x33545844:  # DXT3
        assert width * height == lngth
        image_0x33545844(header_id, width, height, content[24:])
    elif unk2 == 0x35545844:  # DXT5
        assert width * height == lngth
        image_0x35545844(header_id, width, height, content[24:])
    elif unk2 == 0x1C:
        assert width * height == lngth
        image_0x1C(header_id, width, height, content[24:])
    elif unk2 == 0x1F4:
        image_0x1F4(header_id, content[24:])
    else:
        print "%08X %04X %04X" % (l, m, n)
        print "%08s %08s %08s %08s %08s %08s" % ("file_id", "unk1", "width", "height", "unk2", "lngth")
        print "%08X %08X %08X %08X %08X %08X" % (header_id, unk1, width, height, unk2, lngth)
        dump(content[24:])
Example #35
0
    def train(self, train_set: pd.DataFrame, force: bool = False, save: bool = True) -> None:
        if not force and self.LogReg_pipeline is not None:
            return

        self.LogReg_pipeline = Pipeline([
            ('tfidf', TfidfVectorizer(stop_words=self.stop_words)),
            ('clf', LogisticRegression(solver='sag')),
        ])

        self.LogReg_pipeline.fit(train_set['comment_text'].map(lambda com: utils.preprocess_text(com)),
                                 train_set['bannable'])

        if save:
            utils.dump(self.LogReg_pipeline, "log_pipeline")
def predict(word_preds,
            stop_words=(),
            k=None,
            t='fr',
            query=False,
            lang=False,
            save_pred=None,
            load_pred=None):

    if load_pred:
        word_preds = load(load_pred)

    else:
        word_preds = list(map(list, word_preds))

        for i in range(len(word_preds)):
            preds = list(filter(has_letter, map(clean, word_preds[i][1][:k])))

            if stop_words:
                preds = [w for w in preds if w not in stop_words]

            if query:  # filter src word
                clean_q = clean(word_preds[i][0])
                preds = [p for p in preds if p != clean_q]

            if lang:  # filter by target language
                preds = list(filter(is_lang(t), preds))

            word_preds[i][1] = preds

    if save_pred:
        dump(word_preds, save_pred)

    pred_words_matrix = [
        list(preds_to_words(wrd_pred, stop_words))
        for _, wrd_pred in word_preds
    ]
    # src-word i -> (tfidf-word scores, tf-idf ngram scores)
    tfidf_results = tfidf_word_feats(pred_words_matrix)

    for (wrd, wrd_pred), (tfidf_words, tfidf_ngs) in \
            zip(word_preds, tfidf_results):
        words_w_ranks = list(preds_to_words(wrd_pred, stop_words, rank=True))
        yield word_row_feats(
            wrd,
            words_w_ranks,
            t,
            tfidf_words=tfidf_words,
            tfidf_ngrams=tfidf_ngs,
        )
Example #37
0
def user():
    user = {}
    with open("dataset/yelp_academic_dataset_user.json", "r") as f:
        for line in f:
            line = json.loads(line)
            user_id = line['user_id']
            user[user_id] = {}
            # user[user_id]['review_cnt'] = line['review_count']
            user[user_id]['yelp_since'] = line['yelping_since']
            # user[user_id]['friends_cnt'] = len(line['friends'])
            # user[user_id]['fans'] = line['fans']
            # user[user_id]['elite_year_cnt'] = len(line['elite'])
            user[user_id]['elite'] = line['elite']
            user[user_id]['avg_stars'] = line['average_stars']
    utils.dump(user, "dicts/user.p")
Example #38
0
    def insert(cls, *instances):
        """ Insert fact instances (overridden to handle Dimensions correctly)
        """
        if instances:
            columns = [column for column in cls.__columns__
                       if not isinstance(column, AutoColumn)]
            sql = "%s INTO %s (\n  %s\n)\n" % (
                cls.INSERT, escaped(cls.__tablename__),
                ",\n  ".join(escaped(column.name) for column in columns))

            batches = cls.batch(instances)
            for iteration, batch in enumerate(batches, start=1):
                log.debug('Inserting batch %s' % (iteration),
                          extra={"table": cls.__tablename__})

                insert_statement = sql
                link = "VALUES"

                for instance in batch:
                    values = []
                    for column in columns:
                        value = instance[column.name]
                        if isinstance(column, DimensionKey):
                            if not value and column.optional:
                                values.append(dump(value))
                            else:
                                values.append(
                                    "(%s)" % column.dimension.__subquery__(
                                        value,
                                        instance.__dimension_selector__.timestamp(instance) # TODO This is a bit messy - shouldn't have to pass the instance back in.
                                        )
                                    )
                        else:
                            values.append(dump(value))
                    insert_statement += link + (" (\n  %s\n)" % ",\n  ".join(values))
                    link = ","

                connection = Warehouse.get()
                try:
                    with closing(connection.cursor()) as cursor:
                        cursor.execute(insert_statement)
                except Exception as e:
                    classify_error(e)
                    log.error(e)
                    log.error(insert_statement)
                    connection.rollback()
                else:
                    connection.commit()
Example #39
0
    def search_list(self, page=1, page_size=10, query_params={}, sort_params={},pager_flag=True):
        if sort_params == {}:
            sort_params.update({"add_time": -1})
        coll = self.get_coll()
        if pager_flag:
            length = coll.find(query_params).count()
            pager = utils.count_page(length, page, page_size)
            cr = coll.aggregate([
                {"$match": query_params},
                {"$sort": sort_params},
                {"$skip": pager['skip']},
                {"$limit": pager['page_size']}])
        else:
            pager = utils.count_page(0, page, page_size)
            cr = coll.aggregate([
                {"$match": query_params},
                {"$sort": sort_params}])
        objs = []
        for obj in cr:
            obj = utils.dump(obj)
            try:
                for extra_param in self.extra_params:
                    exec("""obj[extra_param] = self.get_%s(obj)"""%extra_param)
                objs.append(obj)
            except:
                objs.append(obj)

        return objs, pager
Example #40
0
 def prepare_fullha(config, ha_file):
     """ Prepare user.full_ha.file """
     conf = yaml.load(ha_file)
     net_ip = ".".join((config['servers']['control-servers'][0]['ip'].split(".")[:3]))
     vipc = net_ip + ".253"
     conf["coe::base::controller_hostname"] = "control-server"
     conf["horizon::keystone_url"] = change_ip_to(conf["horizon::keystone_url"], vipc)
     conf["controller_names"] = [c["hostname"] for c in config['servers']['control-servers']]
     conf["openstack-ha::load-balancer::controller_ipaddresses"] = [c["ip"]
                                                                    for c in config['servers']['control-servers']]
     conf["openstack-ha::load-balancer::swift_proxy_ipaddresses"] = [c["ip"]
                                                                    for c in config['servers']['swift-proxy']]
     conf["openstack-ha::load-balancer::swift_proxy_names"] = [c["hostname"]
                                                                    for c in config['servers']['swift-proxy']]
     vipsw = net_ip + ".252"
     conf["openstack::swift::proxy::swift_proxy_net_ip"] = "%{ipaddress_eth2}"
     conf["openstack::swift::proxy::swift_memcache_servers"] = [i["ip"] + ":11211"
                                                                for i in config['servers']['swift-proxy']]
     conf["nova::memcached_servers"] = [i["ip"] + ":11211" for i in config['servers']['control-servers']]
     conf["rabbit_hosts"] = [i["hostname"] + ":5672" for i in config['servers']['control-servers']]
     conf["galera::galera_servers"] = [c["ip"] for c in config['servers']['control-servers']]
     conf["galera::galera_master"] = config['servers']['control-servers'][0]["hostname"] + "." + DOMAIN_NAME
     conf["galera_master_name"] = config['servers']['control-servers'][0]["hostname"]
     conf["galera_master_ipaddress"] = config['servers']['control-servers'][0]["ip"]
     conf["galera_backup_names"] = [i["hostname"] for i in config['servers']['control-servers'][1:]]
     conf["galera_backup_ipaddresses"] = [i["ip"] for i in config['servers']['control-servers'][1:]]
     conf["openstack::swift::storage-node::storage_devices"] = ["vdb", "vdc", "vdd"]
     return dump(conf)
Example #41
0
 def create(self, **obj):
     coll = self.get_coll()
     curr_time = datetime.datetime.now()
     obj = json.loads(json.dumps(obj))
     obj["add_time"] = str(curr_time)
     coll.insert_one(obj)
     return utils.dump(obj)
Example #42
0
    def __subquery__(cls, value, timestamp):
        """ Return a SQL SELECT query to use as a subquery within a
        fact INSERT. Does not append parentheses or a LIMIT clause.
        """
        value_type = type(value)
        # We also check for subclasses for situations like basestring, which
        # matches on either str or unicode.
        natural_keys = [key for key in cls.__naturalkeys__
                        if (key.type is value_type or
                            issubclass(value_type, key.type))]
        if not natural_keys:
            raise ValueError("Value type '%s' does not match type of any "
                             "natural key for dimension "
                             "'%s'" % (value_type.__name__, cls.__name__))

        sql_template = (
            'SELECT {primary_key} FROM {table_name} '
            'WHERE {selector} '
            'AND `applicable_from` = (SELECT max(`applicable_from`) '
            'FROM {table_name} '
            'WHERE {selector} AND `applicable_from` <= "{timestamp}")'
            )
        sql = sql_template.format(
            primary_key=escaped(cls.__primarykey__.name),
            table_name=escaped(cls.__tablename__),
            selector=" OR ".join("%s = %s" % (escaped(key.name), dump(value)) for key in natural_keys),
            timestamp=timestamp
            )
        return sql
def erb():
    working_dir = get_working_dir()
    log.setup(logging.DEBUG, path=working_dir)

    window_size = default_input('Window size', 3)
    n_nodes = default_input('N Nodes', 100)
    connectivity = default_input('Connectivity', 2)
    f = default_input('From', 0)
    t = default_input('To', n_nodes + 1)
    s = default_input('Step', n_nodes / 10)
    r = range(f, t, s)

    distribution = estimate_reservoir_distribution(
        30, n_nodes, connectivity, r, window_size)

    name = '[NN:{}-WS:{}-K:{}]-distribution'.format(n_nodes, window_size, connectivity)
    dump(distribution, name, folder=working_dir)
Example #44
0
 def update(self, query_params, update_params):
     coll = self.get_coll()
     obj = coll.find_one(query_params)
     if obj:
         obj.update(update_params)
         ret = coll.save(obj)
     else:
         obj = {}
     return utils.dump(obj)
Example #45
0
def affective(video_path, video_file, output_path):
  audio_path = output_path
  audio_file = video_file + ".wav"
  
  ret = extract_audio(video_path, video_file, audio_path, audio_file)
  if ret == 0:
    # normal return
    A, a = arousal.get_arousal(video_path, video_file, audio_path, audio_file)
    v = valence.get_valence(audio_path, audio_file, A, a)
    
    utils.dump(output_path, video_file + "_arousal.txt", a)
    utils.dump(output_path, video_file + "_valence.txt", v)
    utils.dump2(output_path + "/final/", video_file + "_final.txt" , v, a)
    plot_data.plot_data_pyplot(output_path + "/final/" + video_file + "_final.txt", v, a)
  try:
    os.remove(audio_path + audio_file)
  except OSError:
    print "Warning: file not removed"
  print "Finished"
Example #46
0
 def search(self, query_params):
     coll = self.get_coll()
     obj = coll.find_one(query_params)
     obj = utils.dump(obj)
     try:
         for extra_param in self.extra_params:
             exec ("""obj[extra_param] = self.get_%s(obj)""" % extra_param)
     except:
         pass
     return obj
Example #47
0
 def expression(self):
     s = [escaped(self.name), self.type_expression]
     if not self.optional:
         s.append("NOT NULL")
     default_expression = self.default_clause
     if default_expression:
         s.append(default_expression)
     if self.comment:
         s.append("COMMENT %s" % dump(self.comment))
     return " ".join(s)
Example #48
0
    async def send_dispatch_event(self, event_type, guild, before=None,
                                  after=None):
        e = dict(ts=time(),
                 type=event_type,
                 producer=str(self),
                 guild=dump(guild))

        if before:
            if after:
                e['before'] = dump(before)
                e['after'] = dump(after)
            else:
                e['data'] = dump(before)

        self.log("{event}:{gid} @ {ts}".format(event=e['type'],
                                               gid=e['guild']['id'],
                                               ts=e['ts']))

        await self.send('discord.events.{}'.format(e['type']), e)
Example #49
0
def view_or_basicauth(view, request, *args, **kwargs):
    # Check for valid basic auth header
    utils.dump(request)
    if 'HTTP_AUTHORIZATION' in request.META:
        auth = request.META['HTTP_AUTHORIZATION'].split()
        if len(auth) == 2:
            if auth[0].lower() == "basic":
                uname, passwd = base64.b64decode(auth[1]).split(':')
                user = authenticate(username=uname, password=passwd)
                if user is not None and user.is_active:
                    request.user = user
                    return view(request, *args, **kwargs)

    # Either they did not provide an authorization header or
    # something in the authorization attempt failed. Send a 401
    # back to them to ask them to authenticate.
    response = HttpResponse()
    response.status_code = 401
    response['WWW-Authenticate'] = 'Basic realm="%s"' % "wtf"
    return response
Example #50
0
    def insert(cls, *instances):
        """ Insert one or more instances into the table as records.
        """
        if instances:
            columns = [column for column in cls.__columns__
                       if not isinstance(column, AutoColumn)]

            sql = "%s INTO %s (\n  %s\n)\n" % (
                cls.INSERT, escaped(cls.__tablename__),
                ",\n  ".join(escaped(column.name) for column in columns))

            batches = cls.batch(instances)
            for iteration, batch in enumerate(batches, start=1):
                log.debug('Inserting batch %s' % (iteration),
                          extra={"table": cls.__tablename__})

                insert_statement = sql
                link = "VALUES"

                for instance in batch:
                    values = []
                    for column in columns:
                        value = instance[column.name]
                        values.append(dump(value))
                    insert_statement += link + (" (\n  %s\n)" % ",\n  ".join(values))
                    link = ","

                for i in range(1, 3):
                    connection = Warehouse.get()
                    try:
                        cursor = connection.cursor()
                        cursor.execute(insert_statement)
                        cursor.close()

                    except Exception as e:
                        classify_error(e)
                        if e.__class__ == BrokenPipeError and i == 1:
                            log.info(
                                'Trying once more with a fresh connection',
                                extra={"table": cls.__tablename__}
                                )
                            connection.close()
                        else:
                            log.error(e)
                            return
                    else:
                        connection.commit()
                        break

        log.debug('Finished updating %s' % cls.__tablename__,
                  extra={"table": cls.__tablename__})
Example #51
0
def _main(opts, args):
    level = logging.DEBUG if opts.debug else logging.INFO
    logging.basicConfig(format=FORMAT, level=level)

    if opts.quiet:
        logging.disable(logging.CRITICAL)

    logging.info('Imagy started')
    logging.debug(map(str, (args, opts)))

    if not opts.memorystore:
        store_path = opts.store_path
        if store_path is None:
            store_path = imagy_at_home = path('~').expanduser().joinpath(IMAGY_DIR_NAME)
            snippet = (' and backup files' if config.KEEP_ORIGINALS else '')
            msg = 'Using %s to store configuration%s, you can modify this path in config.py under STORE_PATH'
            logging.info(msg, imagy_at_home, snippet)
        store.load(store_path)

    args = [path(arg) for arg in args or FILE_PATTERNS if arg]
    run_daemon = opts.run

    if opts.clear: clear()
    elif opts.dump: dump(store)
    elif opts.revert: revert()
    elif opts.list: list_files()
    elif opts.files: do_files(*args)
    elif opts.deloriginals: delete_originals()
    elif opts.version: version()
    else: run_daemon = True

    if run_daemon:
        # if nothing specified so far, just run `smart mode` i.e. initialize the
        # directories and then run the daemon afterwards
        if not opts.no_init:
            initialize(*args)
        if not opts.no_watch:
            watch.watcher.run(*args)
Example #52
0
 def prepare_role(config, role_file):
     """ Prepare role_mappings file """
     roles = {config["servers"]["build-server"]["hostname"]: "build"}
     for c in config["servers"]["control-servers"]:
         roles[c["hostname"]] = "controller"
     for c in config["servers"]["compute-servers"]:
         roles[c["hostname"]] = "compute"
     for c in config["servers"]["swift-storage"]:
         roles[c["hostname"]] = "swift_storage"
     for c in config["servers"]["swift-proxy"]:
         roles[c["hostname"]] = "swift_proxy"
     for c in config["servers"]["load-balancer"]:
         roles[c["hostname"]] = "load_balancer"
     return dump(roles)
Example #53
0
def cache(url, path):
	print 'Downloading: %s' % urlparse.urlsplit(url).path
	page = requests.get(url, proxies=PROX)
	if page.status_code == 404:
		return {'url': url, 'uid': 'Error: 404', 'time': gmt_now()}, []
	assert page.status_code == 200, 'Error: Status code error in page download, received %s @ %s' % (page.status_code, urlparse.urlsplit(url).path)
	tree = html.fromstring(page.content)

	links = list_links([str(x) for x in tree.xpath(r'//*[@href]/@href')], url=url)

	uid = dump(html.tostring(tree), path, verbose=True)

	pause()
	return {'url': url, 'uid': uid, 'time': gmt_now()}, links
Example #54
0
    def __init__(self, output_path, prod_name, wisdom_update = {}):
        """
        Initialize postprocessor with output parameters.

        :param output_path: path where postprocessing files are stored
        :param prod_name: name of manifest json file and prefix of all output files
        :param wisdom_update: an optional dictionary that maps variables to
                              modifications requested in their visualization wisdom
        """
        logging.info("Postprocessor: output_path=%s prod_name=%s" % (output_path, prod_name)) 
        dump(wisdom_update,"Postprocessor: wisdom_update")
        self.output_path = output_path
        self.product_name = prod_name
        self.manifest = {}
        self.wisdom_update = wisdom_update

        # in case the manifest exists, load the existing version
        mf_path = os.path.join(output_path, prod_name + '.json')
        if osp.exists(mf_path):
            self.manifest = json.load(open(mf_path))
            logging.info('postprocessor: Loaded manifest at %s' % mf_path)
            # dump(self.manifest,"postprocessor: manifest")
        else:   
            logging.info('postprocessor: manifest at %s does not exist yet' % mf_path)
Example #55
0
    def execute(cls, **params):
        database = getattr(cls, "database")
        query = getattr(cls, "query").format(**{key: dump(value) for key, value in params.items()})

        with NamedConnection(database) as connection:
            with closing(connection.cursor(dictionary=True)) as cursor:
                cursor.execute(query)
                rows = []
                for row in cursor:
                    # Dump the rows immediately into memory, otherwise
                    # the connection might timeout.
                    rows.append(row)

        for row in rows:
            yield row
Example #56
0
 def prepare_common(config, common_file):
     """ Prepare user.common.file """
     conf = yaml.load(common_file)
     net_ip = ".".join((config['servers']['control-server'][0]['ip'].split(".")[:3]))
     vipc = net_ip + ".253"
     conf["controller_public_address"] = vipc
     conf["controller_admin_address"] = vipc
     conf["controller_internal_address"] = vipc
     conf["coe::base::controller_hostname"] = "control-server"
     conf["domain_name"] = "domain.name"
     conf["ntp_servers"] = ["ntp.esl.cisco.com"]
     conf["external_interface"] = "eth4"
     conf["nova::compute::vncserver_proxyclient_address"] = "%{ipaddress_eth0}"
     conf["build_node_name"] = "build-server"
     conf["controller_public_url"] = change_ip_to(
         conf["controller_public_url"],
         vipc)
     conf["controller_admin_url"] = change_ip_to(
         conf["controller_admin_url"],
         vipc)
     conf["controller_internal_url"] = change_ip_to(
         conf["controller_internal_url"],
         vipc)
     conf["cobbler_node_ip"] = config['servers']['build-server'][0]['ip']
     conf["node_subnet"] = ".".join(conf["cobbler_node_ip"].split(".")[:3]) + ".0"
     conf["node_gateway"] = ".".join(conf["cobbler_node_ip"].split(".")[:3]) + ".1"
     vipsw = ".".join((config['servers']['control-server'][0]['ip'].split(".")[:3])) + ".252"
     conf["swift_internal_address"] = vipsw
     conf["swift_public_address"] = vipsw
     conf["swift_admin_address"] = vipsw
     conf["swift_proxy_net_ip"] = "%{ipaddress_eth0}"
     conf['mysql::server::override_options']['mysqld']['bind-address'] = "0.0.0.0"
     #    config['servers']['control-server'][0]['ip']
     conf['swift_storage_interface'] = "eth0"
     conf['swift_local_net_ip'] = "%{ipaddress_eth0}"
     conf['internal_ip'] = "%{ipaddress_eth0}"
     conf['public_interface'] = "eth0"
     conf['private_interface'] = "eth0"
     conf['install_drive'] = "/dev/vda"
     conf['mon_initial_members'] = config['servers']['control-server'][0]["hostname"]
     conf['ceph_primary_mon'] = config['servers']['control-server'][0]["hostname"]
     conf['ceph_monitor_address'] = config['servers']['control-server'][0]["ip"]
     conf['ceph_cluster_interface'] = "eth0"
     conf['ceph_cluster_network'] = net_ip + ".0/24"
     conf['ceph_public_interface'] = "eth0"
     conf['ceph_public_network'] = net_ip + ".0/24"
     return dump(conf)
Example #57
0
def cache_update(obj, path, links=[]):
	# headers = {'If-Modified-Since': obj['time']}
	headers = {}
	print 'Checking: %s' % urlparse.urlsplit(obj['url']).path
	page = requests.get(obj['url'], proxies=PROX, headers=headers)
	if page.status_code == 304:
		print 'No changes made...'
	else:
		assert page.status_code == 200, 'Error: Status code error in page download, received %s @ %s' % (page.status_code, urlparse.urlsplit(obj['url']).path)
		tree = html.fromstring(page.content)

		uid = dump(html.tostring(tree), path, verbose=True)

		links = list_links([str(x) for x in tree.xpath(r'//*[@href]/@href')], url=obj['url'])

	obj['time'] = gmt_now()
	return obj, links
Example #58
0
    def list(self,query_list,sort_list,use_pager=True,is_origin=False,page=1,page_size=options.page_size):
        length = self.get_coll().find(query_list).count()

        if use_pager:
            pager = utils.count_page(length,page,page_size)
            list = self.get_coll().aggregate([{"$match" : query_list},
                                                   {"$sort":sort_list},
                                                   {"$skip":pager['skip']},
                                                   {"$limit":pager['page_size']}])
        else:
            pager = []
            list = self.get_coll().aggregate([{"$match" : query_list},
                                                   {"$sort":sort_list}
                                                   ])
        if is_origin:
            return list,pager
        else:
            return utils.dump(list),pager
Example #59
0
def prepare_new_files(config, path, use_sudo_flag):
    """ Prepare hostname specific files in puppet/data/hiera_data/hostname """

    def write(text, path, filename, sudo):
        fd = StringIO(text)
        warn_if_fail(put(fd, os.path.join(path, filename), use_sudo=sudo))
        warn_if_fail(put(fd, os.path.join(path, filename.replace("-", "_")), use_sudo=sudo))

    for compute in config["servers"]["compute-servers"]:
        file_name = compute["hostname"] + ".yaml"
        ceph = {}
        ceph["cephdeploy::has_compute"] = True
        ceph["cephdeploy::osdwrapper::disks"] = ["vdb", "vdc", "vdd"]
        write(dump(ceph), path, file_name, use_sudo_flag)
    for num, lb in enumerate(config["servers"]["load-balancer"]):
        if num == 0:
            lb_text = ("openstack-ha::load-balancer::controller_state: MASTER\n"
                       "openstack-ha::load-balancer::swift_proxy_state: BACKUP\n"
            )
        else:
            lb_text = ("openstack-ha::load-balancer::controller_state: BACKUP\n"
                       "openstack-ha::load-balancer::swift_proxy_state: MASTER\n"
            )
        file_name = lb["hostname"] + ".yaml"
        write(lb_text, path, file_name, use_sudo_flag)
    for num, sw in enumerate(config["servers"]["swift-storage"]):
        sw_text = (
            'openstack::swift::storage-node::swift_zone: {num}\n'
            'coe::network::interface::interface_name: "%{{swift_storage_interface}}"\n'
            'coe::network::interface::ipaddress: "%{{swift_local_net_ip}}"\n'
            'coe::network::interface::netmask: "%{{swift_storage_netmask}}"\n'.format(num=num+1)
        )
        file_name = sw["hostname"] + ".yaml"
        write(sw_text, path, file_name, use_sudo_flag)
    file_name = config["servers"]["build-server"]["hostname"] + ".yaml"
    b_text = "apache::default_vhost: true"
    write(b_text, path, file_name, use_sudo_flag)
Example #60
0
    return word_counter
        
    
"""
    Sample: Instantiate this directly and call whatever you want
"""
if __name__ == "__main__":
    toc = 'C:/data/books/game_of_thrones/A_Game_Of_Thrones_split_001.html'
    
    book = ebup_parser(table_of_contents=toc)
    
    overall = Counter()
    
    for a in range(3, book.num_chapters):
        
        text = book.get_chapter_contents(a)
        current_chapter_words = get_word_counter(text)
        
        overall += current_chapter_words
    
    
    
    all_words = dict(overall.most_common(200))
    
    dump(all_words)