Example #1
0
def cambiar_version(cambio=1, mostrar=False):
    diff=utils.load_json(DIFF_JSON)
    window=sublime.active_window()
    view=window.active_view()
    filename=view.file_name()
    folder=get_folder(filename)
    actual=diff[view.file_name()]
    viejo=actual
    lista=os.listdir(folder)
    lista=sorted(lista)
    i=lista.index(actual)
    i+=cambio
    if i<0 or i==len(lista):return
    actual=lista[i]
    diff[filename]=actual
    utils.save_json(DIFF_JSON, diff)
    lines=view.lines(sublime.Region(0, view.size()))
    folder=get_folder(filename)
#    self.view.add_regions("diferentes", self.lista, "comment", "bookmark", sublime.DRAW_OUTLINED)
    if not mostrar:utils.set_text(open(get_folder(filename)+os.sep+actual).read())

    print("\n")
    with open(folder+os.sep+actual, 'r') as one:
        with open(folder+os.sep+viejo, 'r') as two:
            diff = difflib.unified_diff(one.readlines(),two.readlines())
            for line in diff:
                line=line.strip()
                if line.startswith("@@ -"):
#                    line=line[4, line.find(",")]
#                    print(line)
                    utils.go_line(int(line[4:line.find(",")])+3)
                if line.startswith("-") or line.startswith("+") or line.startswith("@@"):
                    print(line.strip()+":")
    print("\n")
Example #2
0
def update_assets(args, luna2d_path, config):
	assets_path = args.project_path + "/.luna2d/assets/"
	compiler_path = luna2d_path + "/tools/luac/luac"

	shutil.rmtree(assets_path, ignore_errors=True)
	os.makedirs(assets_path)

	print("Updating assets..")
	shutil.copytree(args.game_path, assets_path + "/game")

	# Rewrite game config with merged config
	utils.save_json(config, assets_path + "/game/config.luna2d")

	strip_unused_resolutions(assets_path, config)

	print("Compiling scripts..")
	for root, subFolder, files in os.walk(assets_path + "/game/scripts"):
		for item in files:
			filename = os.path.realpath(str(os.path.join(root, item)))
			outFilename = filename + "c"

			subprocess.call([compiler_path,
				"-s",
				"-o",
				outFilename,
				filename])

			os.remove(filename)
			os.rename(outFilename, filename)
def find_all_chars(verbose=False):

    char_count = load_json(CHAR_COUNT, {})
    if not char_count:
        train, test, _ = load_data()
        char_count = defaultdict(int)
        S = 0
        for df in (test, train):
            for sentence in df_to_sentences(df):
                S += 1
                for c in sentence:
                    assert len(c) == 1, (c, sentence)
                    char_count[c] += 1
        char_count = {c: n for c, n in char_count.items()}
        save_json(CHAR_COUNT, char_count)
        print('S=%d' % S)

    chars = sorted(char_count, key=lambda c: (-char_count[c], c))
    N = sum(char_count.values())
    print('find_all_chars: %d %r' % (len(chars), ''.join(chars[:100])))
    print('N=%d=%.3fM' % (N, N * 1e-6))

    if verbose:
        tot = 0.0
        for i, c in enumerate(chars[:200]):
            n = char_count[c]
            r = n / N
            tot += r
            print('%4d: %8d %.4f %.3f %4d=%2r' % (i, n, r, tot, ord(c), c))

    return char_count
Example #4
0
 def format_flot(self):
     labels = ["active", "reference"]
     for label in labels:
         data = []
         for i, d in enumerate(self.data[label]):
             data.append([self.tr[label][i],d])
         self._json["data"] = data
         self._json["label"] = label
         save_json(self.format_filename(label),self._json)
Example #5
0
def ichinese():
	urls = get_test_urls()
	res = defaultdict(int)
	for url in urls:
		html = get_or_cache(url)
		chs = get_chinese(html)
		for ch in chs:
			res[ch] += 1

	res = '|'.join([a for a, b in sorted(filter(lambda x: x[1] >= 40, res.iteritems()), key=lambda x: -x[1])])
	save_json('chineses.json', res)
 def _save(self, min_delta=0):
     if self.text_tokens_len + min_delta < len(self.text_tokens):
         print('_save 1: %7d = %7d + %4d %s' % (len(self.text_tokens),
             self.text_tokens_len, len(self.text_tokens) - self.text_tokens_len,
             self.text_tokens_path))
         save_json(self.text_tokens_path, self.text_tokens)
         self.text_tokens_len = len(self.text_tokens)
     if self.token_vector_len + 2 * min_delta < len(self.token_vector):
         print('_save 2: %7d = %7d + %4d %s' % (len(self.token_vector),
             self.token_vector_len, len(self.token_vector) - self.token_vector_len,
             self.token_vector_path))
         save_pickle(self.token_vector_path, self.token_vector)
         self.token_vector_len = len(self.token_vector)
Example #7
0
def LoadsListMusic():
    try:
        with open('DATA', encoding='utf-8') as data_json:
            data_token = json.loads(data_json.read())

        access_token = data_token["access_token"]
        refresh_token = data_token["token"]

        isProxyAPI = utils.get_proxy_host(False)

        data = vkapi.get_audio(refresh_token, isProxyAPI)

        if (config.SaveToFile):
            utils.save_json('response.json', data)

        count_track = data['response']['count']
        i = 0

        for count in data['response']['items']:

            line = count['artist'] + ' — ' + count['title']
            print(line)
            # test.setText(0, str(i + 1))
            # test.setText(1, count['artist'])
            # test.setText(2, count['title'])
            # test.setText(3, utils.time_duration(count['duration']))
            # test.setText(4, utils.unix_time_stamp_convert(count['date']))

            # if ('is_hq' in count and 'is_explicit' in count):
            #     test.setText(5, "HQ (E)")
            #
            # elif 'is_hq' in count:
            #     test.setText(5, "HQ")
            #
            # elif 'is_explicit' in count:
            #     test.setText(5, "E")
            #
            # if (count['url'] == ""):
            #     test.setText(6, "Недоступно")

            i += 1

        # self.label.setText("Всего аудиозаписей: " + str(count_track) + " Выбрано: " + str(0) + " Загружено: " + str(0))
        is_loaded = True

    except vkapi.VKException as ex:
        print(str(ex))

    except Exception as e:
        print(str(e))
Example #8
0
 def download(self, show_id):
     show_id = str(show_id)
     url = self.pattern.format(show_id)
     response = self.request.get(url)
     try:
         json = response.json()
         if json['errno'] == 0:
             print('{}: OK'.format(show_id))
             utils.save_json('{}/{}/index.json'.format(self.dist, show_id),
                             json['data'])
         else:
             print('{}: {}'.format(show_id, json['msg']))
     except (KeyError, ValueError, TypeError, AttributeError) as e:
         print('{}: {}'.format(show_id, response.content))
Example #9
0
 async def add(self, ctx, playlist, link):
     if playlist not in playlists:
         playlists[playlist] = []
     info = get_yt_info(link)
     if info['_type'] == 'playlist':
         for song in info['entries']:
             playlists[playlist].append(f"http://youtu.be/{song['url']}")
         await ctx.send(f'Added YT playlist {info["title"]} to playlist {playlist}!')
     else:
         playlists[playlist].append(link)
         if playlist == self.current_playlist:
             self.playlist.append(link)
         await ctx.send(f'Added song {info["title"]} to playlist {playlist}!')
     save_json(playlists, 'playlists.json')
def handle_metrics(split, metrics, output_dir):
    """
    Log and save metrics

    Args:
    - split: one of train, val, test
    - metrics: metrics dict
    - output_dir: where to save the metrics
    """

    logger.info(f"***** {split} metrics *****")
    for key in sorted(metrics.keys()):
        logger.info(f"  {key} = {metrics[key]}")
    save_json(metrics, os.path.join(output_dir, f"{split}_results.json"))
Example #11
0
    async def unset(self, ctx):
        """Remove your timezone from the database."""
        if str(ctx.author.id) not in self.time_config:
            await show_error(ctx, "You don't have a timezone set.")

        self.time_config.pop(str(ctx.author.id))
        await self.update_times()
        save_json(paths.TIME_SAVES, self.time_config)

        await ctx.send(embed=make_embed(
            title="Unset timezone",
            description=f"Your timezone is now unset.",
            color=colors.EMBED_SUCCESS,
        ))
Example #12
0
 def store_data(self):
     print('Storing data into meta folder')
     if os.path.isdir(constants.META_FOLDER):
         shutil.rmtree(constants.META_FOLDER)
     meta_summary = PluginMetaSummary()
     meta_summary.plugin_amount = len(self)
     meta_summary.plugins = {}
     for plugin in self:
         plugin.save_meta()
         plugin.save_release_info()
         meta_summary.plugins[plugin.id] = plugin.meta_info
     utils.save_json(meta_summary.serialize(),
                     os.path.join(constants.META_FOLDER, 'plugins.json'),
                     compact=True)
def create_summoner_json(cdragon_language, ddragon_language, path):
    """
    Creates DDragon summoner.json
    Highly relies on DDragon, could be improved upon but they don't change often
    """
    cdragon_summoners = download.download_versioned_cdragon_summoner_spells(
        cdragon_language)
    ddragon_summoners = download.download_versioned_ddragon_summoner_spells(
        ddragon_language)
    summoners = {
        "type": "summoner",
        "version": settings.patch['json'],
        "data": {},
    }
    for x in (x for x in cdragon_summoners if x['name'] != ""):
        try:
            ddragon_spell = get_ddragon_id(x['id'], ddragon_summoners)
            summoners['data'].update({
                ddragon_spell['id']: {
                    "id": ddragon_spell['id'],
                    "name": x['name'],
                    "description": x['description'],
                    "tooltip": ddragon_spell['tooltip'],
                    "maxrank": 1,
                    "cooldown": [x['cooldown']],
                    "cooldownBurn": str(x['cooldown']),
                    "datavalues": {},
                    "effect": ddragon_spell['effect'],
                    "effectBurn": ddragon_spell['effectBurn'],
                    "vars": ddragon_spell['vars'],
                    "key": str(x['id']),
                    "summonerLevel": x['summonerLevel'],
                    "modes": x['gameModes'],
                    "costType": ddragon_spell['costType'],
                    "maxammo": ddragon_spell['maxammo'],
                    "range": ddragon_spell['range'],
                    "rangeBurn": ddragon_spell['rangeBurn'],
                    "image": {
                        "full": ddragon_spell['id'] + ".png"
                    },
                    "resource": ddragon_spell['resource'],
                }
            })
        except Exception as ex:
            print(ex + "Failure on Summoner Spell: " + x['name'])
            continue

    utils.save_json(summoners, os.path.join(path, "summoner.json"))
    return summoners
Example #14
0
def main(args):
    if not args.json_paths:
        raise ValueError(f"must provide at least one json path")
    data = list(map(utils.load_yaml, args.json_paths))
    kwargs = dict()
    if args.key_mode == "intersection":
        kwargs["union"] = False
    elif args.key_mode == "union":
        kwargs["union"] = True
    else:
        raise ValueError(f"unsupported key mode: {args.key_mode}")
    kwargs["outlier_threshold"] = args.outlier_threshold
    kwargs["pad"] = args.pad
    agg = reduce_json(data, **kwargs)
    utils.save_json(agg, args.save_path or sys.stdout)
Example #15
0
    async def check_message(self, message):
        if message.author.bot or message.channel != self.bot.get_channel(
                channels.R9K_CHANNEL):
            return

        stripped_content = "".join([
            x for x in unidecode(message.content.strip().casefold())
            if x in string.ascii_letters + string.digits
        ])

        if stripped_content in self.messages:
            await message.delete()
        else:
            self.messages.add(stripped_content)
            save_json(paths.R9K_SAVES, list(self.messages))
Example #16
0
    def format_json(self, fname=None):

        out = {}
        out['meta'] = self.meta.format_json()
        out['description'] = self.description
        out['exercise_files'] = self.exercise_files
        out['table_of_content'] = (self.table_of_content.format_json()
                                   if self.table_of_content else None)
        out['transcript'] = (self.transcript.format_json()
                             if self.transcript else None)

        if fname:
            utils.save_json(out, fname)

        return out
Example #17
0
    def get_channels(self, savepath):
        url_data = {}
        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
        url_data['name'] = "Channels"
        url_data['version'] = 'v2.0'
        url_data['suburl'] = 'channels'
        url_data['method'] = 'GET'
        paramstr = ''
        querystr = ''

        data = self.api_list(url_data, paramstr, querystr)
        file_path = os.path.join(savepath, "data",
                                 "kt_channels_{}.json".format(timestamp))
        save_json(file_path, data)
        return data
Example #18
0
def mas_main(root):
    result = {}
    for apk_path in yield_app_paths(root, print_path=False):
        try:
            apk_path = os.path.join(root, apk_path)
            pred_score, tc = main(apk_path)
            result[apk_path] = {
                "predict": "malware" if pred_score >= 0.5 else "benign",
                "score": pred_score,
                "time_cost": tc
            }
        except Exception:
            pass
    print(result)
    save_json(result, os.path.join(root, "result.json"))
Example #19
0
def prepare_image_data():
    meta_file_data = []
    with os.scandir('output/parts') as it:
        for entry in it:
            if entry.name.endswith('.json'):
                part_file = load_json(entry.path)
                for index, image in enumerate(part_file['images']):
                    meta_file_data.append({
                        'url':
                        image,
                        'file_name':
                        entry.name.replace('.json', '') + ' - ' +
                        str(index + 1)
                    })
    save_json(meta_file_data, 'images.json')
Example #20
0
    def save_readable_batch(
            self, batch: Dict[str, torch.Tensor]) -> Dict[str, List[str]]:
        """A debugging utility"""
        readable_batch = {
            k: self.tokenizer.batch_decode(v.tolist())
            if "mask" not in k else v.shape
            for k, v in batch.items()
        }
        save_json(readable_batch, Path(self.output_dir) / "text_batch.json")
        save_json({k: v.tolist()
                   for k, v in batch.items()},
                  Path(self.output_dir) / "tok_batch.json")

        self.already_saved_batch = True
        return readable_batch
Example #21
0
def get_embeddings(dataset,
                   save_file,
                   pretrained_model=None,
                   random_patches=False):
    torch.cuda.empty_cache()
    torch.manual_seed(0)
    dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=1,
        shuffle=False,
    )
    print('dataloader len: ', len(dataloader))

    if pretrained_model != None:
        embedder = codedbert_embedder.from_pretrained(
            pretrained_model, output_hidden_states=True, return_dict=True)
    else:
        embedder = codedbert_embedder.from_pretrained(
            'bert-base-uncased', output_hidden_states=True, return_dict=True)

    embedder.to(device)
    embedder.eval()
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    codedbert_embeds = dict()
    with torch.no_grad():
        for i, (patches, _, input_ids, attention_mask, _, _,
                img_name) in enumerate(tqdm(dataloader)):
            input_ids = input_ids.to(device)
            patches = patches.to(device)

            inputs = input_ids.squeeze(0).detach().tolist()
            seq = tokenizer.convert_ids_to_tokens(inputs)
            seq = tokenizer.convert_tokens_to_string(seq)
            embeds = construct_bert_input(patches, input_ids, embedder, device,
                                          random_patches)
            attention_mask = F.pad(attention_mask,
                                   (0, embeds.shape[1] - input_ids.shape[1]),
                                   value=1)
            text_emb, img_emb = embedder.embed(embeds, attention_mask)

            codedbert_embeds[img_name[0]] = {
                'text': seq,
                'text_emb': text_emb.tolist(),
                'img_emb': img_emb.tolist()
            }

    save_json(save_file, codedbert_embeds)
Example #22
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', type=str, default='configs/config.json')
    parser.add_argument('--no-cuda', action='store_true')
    parser.add_argument('--parallel', action='store_true')
    args = parser.parse_args()
    args.cuda = torch.cuda.is_available() and not args.no_cuda
    print(args)

    device = torch.device('cuda' if args.cuda else 'cpu')

    config = load_json(args.config)

    model = MNISTNet()
    if args.parallel:
        model = nn.DataParallel(model)
    model.to(device)

    optimizer = optim.Adam(model.parameters(), **config['adam'])
    scheduler = optim.lr_scheduler.StepLR(optimizer, **config['steplr'])

    train_loader, valid_loader = mnist_loader(**config['dataset'])

    trainer = Trainer(model, optimizer, train_loader, valid_loader, device)

    output_dir = os.path.join(config['output_dir'],
                              datetime.now().strftime('%Y%m%d_%H%M%S'))
    os.makedirs(output_dir, exist_ok=True)

    # save config to output dir
    save_json(config, os.path.join(output_dir, 'config.json'))

    for epoch in range(config['epochs']):
        scheduler.step()

        train_loss, train_acc = trainer.train()
        valid_loss, valid_acc = trainer.validate()

        print(
            'epoch: {}/{},'.format(epoch + 1, config['epochs']),
            'train loss: {:.4f}, train acc: {:.2f}%,'.format(
                train_loss, train_acc * 100),
            'valid loss: {:.4f}, valid acc: {:.2f}%'.format(
                valid_loss, valid_acc * 100))

        torch.save(
            model.state_dict(),
            os.path.join(output_dir, 'model_{:04d}.pt'.format(epoch + 1)))
Example #23
0
    def __init__(self, split=None, transform_function=None):

        self.path = path = "//mnt/datasets/public/issam/VOCdevkit/VOC2007/"
        self.transform_function = transform_function

        fname_path = "%s/ImageSets/Main" % path
        path_pointJSON = "%s/pointDict.json" % path

        if split == "train":
            self.imgNames = [
                t.replace("\n", "")
                for t in ut.read_text(fname_path + "/train.txt")
            ]

        elif split == "val":
            self.imgNames = [
                t.replace("\n", "")
                for t in ut.read_text(fname_path + "/val.txt")
            ]
        elif split == "test":
            self.imgNames = [
                t.replace("\n", "")
                for t in ut.read_text(fname_path + "/test.txt")
            ]

        if os.path.exists(path_pointJSON):
            self.pointsJSON = ut.load_json(path_pointJSON)
        else:
            pointDict = get_pointDict(path, self.imgNames)
            ut.save_json(path_pointJSON, pointDict)

        # for j, key in enumerate(pointDict):
        #   print(j)
        #   pList1 = pointDict[key]
        #   pList2 = self.pointsJSON[key]

        #   for p1 in pList1:
        #     y, x = p1["y"], p1["x"]
        #     flag = False
        #     for p2 in pList2:
        #       y2, x2 = p2["y"], p2["x"]
        #       if y == y2 and x == x2:
        #         flag = True
        #         break
        #     assert flag == True

        self.split = split
        self.n_classes = 21
def handle_submit():
    """
    Save tagged labels to JSON file
    """
    labels = [int(label) for label in request.form.getlist('labels')]
    user_id = session.get('email', '')
    if PMIDS_PATH.lower().endswith('.txt'):
        paper_id = request.form['paper_id']
        data = parse_pubmed_xml(paper_id)
        sentences = sent_tokenize(data['abstract'])
        data.update({
            'paper_id':
            paper_id,
            'user_id':
            user_id,
            'sentences':
            sentences,
            'labels': [int(s in labels) for s in np.arange(len(sentences))]
        })
    elif PMIDS_PATH.lower().endswith('.json'):
        paper_id = request.form['paper_id']
        data = pmids_json_map[int(paper_id)]
        data['paper_id'] = str(data['paper_id'])
        data.update({
            'user_id':
            user_id,
            'labels':
            [int(s in labels) for s in np.arange(len(data['sentences']))]
        })
        data.pop('enumerate', None)
        data.pop('zip', None)
        if STORE_DETAILS != 1:
            data.pop('title', None)
            data.pop('abstract', None)
            data.pop('sentences', None)
    # save data
    collected_data = read_json(OUTPUT_PATH)
    collected_data = remove_previous(collected_data, data['user_id'],
                                     data['paper_id'])
    collected_data += [data]
    save_json(collected_data, OUTPUT_PATH)

    pmids_untagged = check_ids(collected_data, session['email'], tagged=False)
    if len(pmids_untagged) > 0:
        return flask.redirect('/paper_id/%s' %
                              np.random.choice(pmids_untagged))
    else:
        return flask.redirect('/')
Example #25
0
    def get_languages(self, savepath):
        # get all the languages
        url_data = {}
        url_data['name'] = "All Languages"
        url_data['version'] = 'v2.0'
        url_data['suburl'] = 'languages'
        url_data['method'] = 'GET'
        paramstr = ''
        querystr = ''

        data = self.api_list(url_data, paramstr, querystr)
        file_path = os.path.join(savepath, "data", "kt_lauguages.json")
        try:
            save_json(file_path, data)
        except:
            pass
Example #26
0
def mas_flow_main(root):
    result = {}
    for apk_path in yield_app_paths(root, print_path=False):
        try:
            apk_path = os.path.join(root, apk_path)
            malware_time, score, time_cost = flow_main2(apk_path)
            result[apk_path] = {
                "predict": "malware" if malware_time > 0 else "benign",
                "malware_time": malware_time,
                "score": score,
                "time_cost": time_cost
            }
        except Exception:
            pass
    print(result)
    save_json(result, os.path.join(root, "result.json"))
Example #27
0
    def fetch_meta_list(self):
        meta_list = []
        brand_list = self.fetch_brand_list()
        print(len(brand_list), brand_list)
        for brand_item in brand_list:
            class_list = self.fetch_class_list(brand_item)
            if len(class_list) == 0:
                class_list = [{'app_class_id': '', 'app_class_name': ''}]
            print(len(class_list), brand_item, class_list)
            for class_item in class_list:
                meta_list.append({
                    'brand_item': brand_item,
                    'class_item': class_item
                })

        save_json(meta_list, 'meta_list.json')
Example #28
0
    async def remove(self, ctx, event: Event):
        """Remove an event."""
        if ctx.author.id != event.owner:
            await show_error(ctx, "You are not the owner of this event.")

        event_config.pop(event.name.lower())
        saving = {}
        for event in event_config:
            saving[event] = list(event_config[event])
        save_json(paths.EVENT_SAVES, saving)

        await ctx.send(embed=make_embed(
            title="Deleted event",
            description="Your event was removed successfully.",
            color=colors.EMBED_SUCCESS,
        ))
Example #29
0
def monitor():
    cur_minutes = time.strftime('%M', time.localtime(time.time()))
    print('cur_minutes:' + cur_minutes)
    while True:
        if time.strftime('%M', time.localtime(time.time())) == cur_minutes:
            print("start.....")
            estimated_roi = handler()
            print(estimated_roi)
            d_name = datetime.datetime.now().strftime(
                '%Y-%m-%d %H:%M').replace(':', "-").replace(' ', "-")
            tool.save_json(
                tool.log_dir + '/blood-status-' + tool.BLOOD_LISTEN_OBJ + '-' +
                d_name + ".log", ads_group_blood)
            time.sleep(60)
        else:
            time.sleep(1)
Example #30
0
    async def unsubscribe(self, ctx, event: Event):
        """Unsubscribe to an event."""
        if str(ctx.author.id) not in event.members:
            await show_error(ctx, "You're not subscribed to that event.")

        event.members.remove(ctx.author.id)
        saving = {}
        for event in event_config:
            saving[event] = list(event_config[event])
        save_json(paths.EVENT_SAVES, saving)

        await ctx.send(embed=make_embed(
            title="Unsubscribed to event",
            description="You were successfully unsubscribed to that event.",
            color=colors.EMBED_SUCCESS,
        ))
Example #31
0
def clean():
    args = get_args_clean()
    import os
    full_path = os.path.abspath(args["json_file"])
    folder = os.path.dirname(full_path)
    all_imgs = utils.open_json(args["json_file"])
    filtered = utils.filter_by_type(os.listdir(folder), "_c.jpg")

    clean_json = []
    for img in all_imgs:
        for one in filtered:
            if utils.equal(img["file_name"], one):
                clean_json.append(img)
                break

    utils.save_json(clean_json, args["outfile"])
Example #32
0
def predict(models):
    test_unass = load_json(TEST_UNASS_PATH)
    testdatafeatures = load_pickle(
        os.path.join(TEST_FEATURE_DIR, 'testdatafeatures-withsetinfo.pkl'))
    title_feature_df = pd.read_pickle(
        os.path.join(TEST_FEATURE_DIR, 'test-title-distance-df.pkl'))
    title_feature = title_feature_df.values

    models_loaded = []
    for model_info in models:
        model = {
            'model': load_pickle(model_info['model']),
            'ss': load_pickle(model_info['ss']),
            'cols': model_info['cols'],
            'score': model_info['score']
        }
        models_loaded.append(model)

    scores = [model_info['score'] for model_info in models_loaded]
    weights = [score / sum(scores) for score in scores]
    weights = np.array(weights).reshape(1, len(models_loaded))
    print(weights)

    submission = defaultdict(list)
    for pid_with_index in tqdm.tqdm(test_unass):
        candidate_aids = testdatafeatures[pid_with_index]['candidate-aids']
        data = testdatafeatures[pid_with_index]['data']
        data_length = len(candidate_aids)
        title_data = title_feature[:data_length]
        title_feature = title_feature[data_length:]
        data = np.concatenate((data, title_data), axis=1)
        default_cols = BASE_COLS + SET_INFO_COLS + TITLE_COLS
        df = pd.DataFrame(data=data, columns=default_cols)

        inner_data = np.zeros((len(candidate_aids), len(models_loaded)))
        for num, model_info in enumerate(models_loaded):
            model = model_info['model']
            ss = model_info['ss']
            data = df[model_info['cols']].values
            data = ss.transform(data)
            output = model.predict_proba(data)
            inner_data[:, num] = output

        final_output = np.sum((inner_data * weights), axis=1)
        predict_author = candidate_aids[np.argmax(final_output)]
        submission[predict_author].append(pid_with_index.split('-')[0])
    save_json(submission, os.path.join(FINAL_DIR, 'result-top3models.json'))
def combine_graphs_old(options):
    cl_graph = load_graph(options.classifier_graph)
    ###
    rv_graph = load_graph(options.rnaview_graph)
    mc_graph = load_graph(options.mc_annotate_graph)
    mo_graph = load_graph(options.moderna_graph)
    fr_graph = load_graph(options.fr3d_graph)
    ###
    cl_d = graph_to_doublets(cl_graph, 'MY')
    rv_d = graph_to_doublets(rv_graph)
    mc_d = graph_to_doublets(mc_graph)
    mo_d = graph_to_doublets(mo_graph)
    fr_d = graph_to_doublets(fr_graph)
    ###
    results = {}
    for _id, d in cl_d.items():
        desc = d.get('desc')
        if desc is None:
            desc = ''
        n_type = d.get('n_type')
        exp_res = get_expected_result(rv_d.get(_id), mc_d.get(_id),
                                      mo_d.get(_id), fr_d.get(_id))
        full_id = _id
        if options.pdb_id:
            full_id = options.pdb_id.upper() + ":" + _id
        else:
            full_id = _id
        if exp_res != '' and exp_res[0] != '?':
            # print "reference doublet %s: %s" % (_id,exp_res)
            add_result(results, 'ref-all', full_id, exp_res, n_type)
            if exp_res == desc:
                add_result(results, 'ref-ok', full_id, exp_res, n_type)
            elif ('?' + exp_res) == d['desc']:
                add_result(results, 'ref-ok-fuzzy', full_id, exp_res, n_type)
            elif desc == '':
                add_result(results, 'ref-undetected', full_id, exp_res, n_type)
            else:
                add_result(results, 'ref-diff', full_id, exp_res, n_type)
        elif exp_res != '' and exp_res[0] == '?' and desc != '' and desc[
                0] == '?':
            exp_results = exp_res[2:len(exp_res) - 1].split("|")
            # print desc[1:], exp_results
            if desc[1:] in exp_results:
                add_result(results, 'fuzzy-ok', full_id, desc[1:], n_type)
        elif exp_res == '' and desc != '' and desc[0] != '?':
            add_result(results, 'prev-undetected', full_id, desc, n_type)
    save_json(options.output_json, results)
    def run(self, edit):
        d={}
        modulos=utils.file_read(GO_API_FILE)
        lineas=modulos.splitlines()
        for linea in lineas:
            if linea:
                ocurrencias=re.findall(REGEX_FUNCION, linea, re.IGNORECASE)
                if ocurrencias:
                    paquete=ocurrencias[0][0]
                    if paquete.find("/")!=-1:paquete=paquete[paquete.find("/")+1:]
                    funcion=ocurrencias[0][1]
                    if not d.get(paquete):d[paquete]=[]
                    d[paquete].append(funcion)

        utils.save_json(GO_MAIN_MODULE, d)
        for key in d.keys():
            utils.save_json(GO_MODULES+key+".json", d[key])
Example #35
0
    def check_redditor(self, stream_source: praw.models.Redditor,
                       submission_stream: Generator):
        for submission in submission_stream:
            if submission is None:
                break
            if not submission.subreddit in self.monitored_subreddits:
                continue

            if submission.allow_live_comments:
                self.monitored_streams["monitored"][submission.id] = None

                utils.save_json(self.config_dir / "monitored_streams.json",
                                self.monitored_streams)
                redditor_name = str(stream_source)
                logger.info(
                    f"{redditor_name} has gone live on {submission.subreddit} at ({submission.shortlink}) notifing {len(self.users['subscribers'])} subscribers, and posting to discord.",
                )

                if self.webhook is not None:
                    utils.webhook_post(
                        webhook=self.webhook,
                        plain_text_message=", ".join(
                            self.config["announcements_webhook"]["mention"]),
                        embeds=[
                            utils.discord_embed_builder(
                                embed_title=
                                f"u/{redditor_name} has gone live on {submission.subreddit}!",
                                embed_description=
                                f"[{submission.title}]({submission.shortlink})",
                                embed_image=self.
                                config["announcements_webhook"]["image"],
                                author=redditor_name,
                                author_url=
                                f"https://www.reddit.com/u/{redditor_name}",
                            )
                        ],
                    )

                for subscriber in self.users["subscribers"]:
                    self.reddit.redditor(subscriber).message(
                        subject=
                        f"Hi {subscriber}, u/{redditor_name} is live on {submission.subreddit}!",
                        message=f"[{submission.title}]({submission.shortlink})",
                    )
                    logger.debug(
                        f"Sent subscriber u/{subscriber} gone live message.")
def get_search_option_courses():

    driver = utils.start_driver('chrome')
    home_dir = str(pathlib.Path.home())
    session.login(driver, home_dir + '/plst.credential.json')
    utils.wait(3)

    filt_name_dict = {
        'role': 'ROLES',
        'subject': 'SUBJECTS TO LEARN',
        'tool': 'TOOLS',
        'cert': 'CERTIFICATIONS',
        # 'level': 'SKILL LEVELS',
        # 'author': 'AUTHORS',
    }

    try:

        for filt, filt_name in sorted(filt_name_dict.items()):
            opt_url_dict = utils.load_json(
                'search/filt_{}_urls.json'.format(filt))
            out_dir = 'search/filt_{}_courses'.format(filt)
            os.makedirs(out_dir, exist_ok=True)

            opt_index = 0
            nopt = len(opt_url_dict)
            for opt, url in sorted(opt_url_dict.items()):
                opt_index += 1
                fname_json = '{}/{}.json'.format(out_dir, opt_index)
                if os.path.isfile(fname_json):
                    continue
                # if opt_index >= 10:
                #     break
                utils.print_message(
                    'get all courses with filt={}, option={} ({}/{})'.format(
                        filt, opt, opt_index, nopt))
                course_id_list = search.get_all_courses_per_option(
                    driver, url, wait_time=10)
                opt_courses_dict = {opt: course_id_list}
                utils.save_json(opt_courses_dict, fname_json)
                utils.wait(20)

    finally:
        session.logout(driver)
        utils.wait(3)
        utils.close_driver(driver)
Example #37
0
def conv_knn(embeddings: Matrix, convs: Matrix, dataset: Dataset,
             num_trees: int, num_nns: int, save_loc: str,
             cache_index: bool) -> None:
    index = np.dot(convs.transpose(0, 2, 1), embeddings.transpose(1, 0))
    sorted_idxs = np.argsort(index, axis=-1)

    def parse_vec(seq_id, filter_id):
        v = convs[seq_id, :, filter_id]
        idxs = sorted_idxs[seq_id, filter_id][-num_nns:]
        words = dataset.decode(idxs, keep_first=True)
        return {
            'words': words,
            'norm': float(np.sqrt(np.sum(v**2))),
        }

    utils.save_json([[parse_vec(i, j) for i in range(convs.shape[0])]
                     for j in range(convs.shape[2])], save_loc, 'knns.json')
Example #38
0
    def on_query_completions(self, view, prefix, locations):
        print(utils.get_language())
        lang=utils.get_language()
        if lang!="javascript" and lang!="nodejs":return
        if not utils.is_point():return

        jsonPath=sublime.packages_path()+os.sep+"javascript"+os.sep+"functions.json"
        if lang=="nodejs":jsonPath=sublime.packages_path()+os.sep+"javascript"+os.sep+"functions_node.json"

        d=utils.load_json(jsonPath)
        obj=utils.get_word(-1)
        if not d.get(obj):
            d[obj]=[];
            utils.save_json(jsonPath, d)
            return

        functions=d[obj]
        return utils.get_completion_list(functions)
Example #39
0
 def run(self):
     jsonDiff=utils.load_json(DIFF_JSON)
     window=sublime.active_window()
     view=window.active_view()
     text=utils.get_text()
     filename=view.file_name()
     rutaCarpeta=get_folder(filename)
     utils.create_folder_if_not_exist(rutaCarpeta)
     nombreArchivo=time.strftime("%Y%m%d%H%M%S")
     lista=os.listdir(rutaCarpeta)
     escribir=True
     if lista:
         ultimo=max(lista)
         if filecmp.cmp(rutaCarpeta+os.sep+ultimo, filename):escribir=False
     if escribir:
         print("guardando version...")
         rutaArchivo=rutaCarpeta+os.sep+nombreArchivo
         shutil.copyfile(filename, rutaArchivo)
         jsonDiff[filename]=nombreArchivo
         utils.save_json(DIFF_JSON, jsonDiff)
Example #40
0
	def processClass(self, ruta):
#		print("procesando : "+ruta)
#		time.sleep(0.1)
		self.i+=1
		sublime.status_message(str(self.i)+" clases procesadas")
		proceso=Popen("javap "+ruta, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True)
		salida=proceso.communicate()[0].decode("utf-8")
		superClases=re.findall("class\s+[\w.]+\s+extends\s+([\w.]+)", re.sub("\<[^>]+\>", "", salida), flags=re.IGNORECASE)
		if superClases:
#			print("tiene superclase y es : "+superClases[0])
			self.processClass(PATH_CLASSES+"/"+superClases[0].replace(".", "/"))
#		os.remove(ruta)
		json=self.getJson(salida)
		ruta=ruta.replace(PATH_CLASSES, PATH_JSON).strip()[:-5]+"json"
		if superClases:
			pathJsonSuperClass=PATH_JSON+"/"+superClases[0].replace(".", "/")+".json"
#			print("el path json es : "+pathJsonSuperClass)
			if os.path.exists(pathJsonSuperClass):
#				print("se van a mezclar")
				jsonSuperClass=utils.load_json(pathJsonSuperClass)
				json["clase"]=list(set(json["clase"]+jsonSuperClass["clase"]))
				json["object"]=list(set(json["object"]+jsonSuperClass["object"]))
		utils.save_json(ruta, json)
Example #41
0
def make_config_dir(args, luna2d_path):
	config_dir = args.project_path + "/.luna2d"
	os.makedirs(config_dir)

	build_config = {
		"projectName" : args.name,
		"platform" : args.platform,
		"gamePath" : utils.normalize_slashes(os.path.relpath(args.game_path, args.project_path)),
	    "luna2dPath": utils.normalize_slashes(luna2d_path),
	}

	utils.save_json(build_config, config_dir + "/build.luna2d")

	if not args.strip_git:
		with open(config_dir + "/.gitignore", "w") as file:
			file.writelines(
				[
					"cache/*\n",
					"assets/*\n",
					"libs/*\n",
				])

	shutil.copyfile(utils.get_scripts_path() + "/update.py", config_dir + "/update.py")
Example #42
0
    def on_pre_save(self, view):
        lang=utils.get_language()
        if lang!="javascript" and lang!="nodejs":return
        text=utils.get_text() 

        
        text=re.sub("\$\([\"'.\w#-]*\)", "jQuery", text)
        functions=re.findall("([$A-Za-z]+)\.([\w]+)\(", text)

        jsonPath=sublime.packages_path()+os.sep+"javascript"+os.sep+"functions.json"
        if lang=="nodejs":jsonPath=sublime.packages_path()+os.sep+"javascript"+os.sep+"functions_node.json"

        d=utils.load_json(jsonPath)
        for function in functions:
            key=function[0]
            if key=="$scope":continue
            value=function[1]+"()"
            if not d.get(key):d[key]=[]
            if not value in d[key]:
                d[key].append(value)
        utils.save_json(jsonPath, d)

                
Example #43
0
		def new_func(*args, **kwargs):
			res = func(*args, **kwargs)
			save_json(_path + '%s.json' % name, res)
			return res
                xprint('#' * 80)
                clf_str = str(get_clf())
                xprint(clf_str)
                if clf_str in completed_tests:
                    xprint('skipping')
                    continue
                set_random_seed(1234)
                evaluator = Evaluator(n=1)
                ok, auc0 = evaluator.evaluate(get_clf)
                auc_list.append((auc0, get_clf.__name__, str(get_clf())))
                results = [(i, auc, clf, clf_str) for i, (auc, clf, clf_str) in enumerate(auc_list)]
                results.sort(key=lambda x: (-x[1].mean(), x[2], x[3]))
                xprint('~' * 100)
                xprint('RESULTS SO FAR: %d' % len(results))
                for i, auc, clf, clf_str in results:
                    xprint('$' * 100)
                    xprint('auc=%.4f %3d: %s %s' % (auc.mean(), i, clf, clf_str))
                    show_auc(auc)
                xprint('^' * 100)
                xprint('RESULTS SUMMARY: %d' % len(results))
                for i, auc, clf, clf_str in results:
                    xprint('auc=%.4f %3d: %s %s' % (auc.mean(), i, clf, clf_str))

                completed_tests.add(clf_str)
                run_summary['completed'] = sorted(completed_tests)
                save_json(run_summary_path, run_summary)
                xprint('n_completed=%d' % len(completed_tests))
                xprint('&' * 100)

xprint('$' * 100)
Example #45
0
 def save_dict(self, d, fname):
     save_json(d, fname)
     self.saver.sync()
                if len(runs) > n_runs0:
                    xprint('skipping runs=%d n_runs0=%d' % (len(runs), n_runs0))
                    continue

                set_random_seed(random_seed + n_runs0)
                evaluator = Evaluator(n=1)
                ok, auc_reductions, best_method = evaluator.evaluate_reductions(get_clf,
                    PREDICT_METHODS_GOOD)
                assert ok

                for predict_method in sorted(auc_reductions):
                    auc = auc_reductions[predict_method]
                    xprint('<->.' * 25)
                    xprint('predict_method=%s' % predict_method)
                    if predict_method == 'BEST':
                        xprint('best_method=%s' % best_method)
                    assert auc.all() > 0.0, auc

                    auc_list.append((auc, get_clf.__name__, str(get_clf())))
                    show_results(auc_list)

                    runs.append(auc_score_list(auc))
                    completed_tests[str(get_clf())] = runs
                    save_json(run_summary_path, completed_tests)
                    xprint('n_completed=%d = %d + %d' % (len(completed_tests), n_completed0,
                        len(completed_tests) - n_completed0))
                xprint('&' * 100)

touch('completed.spacy_lstm130_flip.txt')
xprint('$' * 100)
    threshold = 1000 if MAKE_CATEGORICALS else 20
    col_level, _, single_val_cols, enumerations = compute_categoricals(ORDERS_NAME, orders,
        threshold=threshold)
    for i, col in enumerate(sorted(col_level, key=lambda k: (-col_level[k], k))):
        print('%3d: %30s %6d %5.1f%%' % (i, col, col_level[col],
              100.0 * col_level[col] / len(orders)))
    print('col_level_type = [')
    for i, col in enumerate(sorted(col_level, key=lambda k: (-col_level[k], k))):
        print('    (%30r, %6d, %8s),  # %3d  %5.1f%%' % (
              col, col_level[col], orders[col].dtype,
              i, 100.0 * col_level[col] / len(orders)
              ))
    print(']')
    print('single_val_cols=%d %s' % (len(single_val_cols), single_val_cols))
    if MAKE_CATEGORICALS:
        save_json(CATEGORY_ENUMERATIONS, enumerations)
        assert False

if QUOTE_SALE:
    customer_col = 'emailAddress'
    customer_col = 'Customer Organisation'
    # customer_col = 'customerId'
    # customer_col = 'endCustomerId'
    # customer_col = 'clientPurchaseOrderNumber'
    # customer_col = 'electedPaymentMethod'

    customers = orders[customer_col]
    unique_customers = set(customers)
    unique_types = {type(cust) for cust in customers}
    float_customers = sorted({cust for cust in customers if isinstance(cust, float)})
Example #48
0
def save_test_urls(urls):
	save_json('test-history.urls', urls)
Example #49
0
 def save(self, name):
     if name==None:return
     print("antes de : "+self.rutaSamples)
     samples=utils.load_json(self.rutaSamples)
     samples[name]=utils.get_text()
     utils.save_json(self.rutaSamples, samples)
Example #50
0
 def save_settings():
     utils.save_json(settings_fn, settings)
Example #51
0
def make_mongo():
	best = MongoBest(conf.mongo_web)
	cates = {}
	tpls = {}
	domains = {}
	tree = URLTree()

	for cate in best.catecory.find():
		obj = {}
		obj['_id'] = hashlib.md5(cate['_id'].encode('utf-8')).hexdigest()
		obj['url'] = cate['_id']
		obj['name'] = cate['name']
		obj['tags'] = filter(lambda x: x, cate['tag'].split('|'))
		obj['cate'] = cate['cate']
		obj['domain'] = get_domain(obj['url'])
		obj['page'] = ''
		obj['all'] = False
		obj['arts'] = 0
		obj['index'] = -1
		obj['log'] = []
		obj['fetch'] = 0
		obj['null'] = 0
		obj['error'] = 0
		obj['status'] = 'common'
		obj['next'] = 0
		obj['last'] = 0
		cates[obj['_id']] = obj

		tree.add(obj)

		if obj['domain'] not in domains:
			domains[obj['domain']] = {
				'_id':obj['domain'], 
				'name':'',
				'link': 'http://www.%s/' % obj['domain'],
				'cates':0,
				'tpls':0,
				'arts':0,
				'articles':0,
				'status':'common',
				'last':0,
			}

		subdomains = get_subdomains(obj['url'])
		if len(subdomains) == 1 and subdomains[0] in ['www', ''] \
				and get_path(obj['url']) in ['', '/']:
			domains[obj['domain']]['name'] = obj['name']
		domains[obj['domain']]['cates'] += 1

	tree.refresh()

	# for cate in cates.itervalues():
	# 	print '%-120s %s' % (cate['_id'], cate['name'])

	for tpl in best.template.find():
		tpl['domain'] = get_domain(tpl['_id'])
		tpl['arts'] = 0
		tpl['articles'] = 0
		tpl['status'] = 'common'
		tpl['last'] = 0
		if tpl['domain'] not in domains:
			print 'tpl(%s) not in domain(%s)' % (tpl['_id'], tpl['domain'])
			continue
		tpls[tpl['_id']] = tpl
		domains[tpl['domain']]['tpls'] += 1

	save_json('spider/domains.json', domains)
	save_json('spider/cates.json', cates)
	save_json('spider/tpls.json', tpls)
Example #52
0
from utils import oauth_login, save_json

track = "Patriots"  # Tweets for Patriots

TOTAL_TWEETS = 2500

patriots = []
patriots_counter = 0

while patriots_counter < TOTAL_TWEETS:  # collect tweets while current time is less than endTime
    # Create a stream instance
    auth = oauth_login(consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET,
                       token=OAUTH_TOKEN, token_secret=OAUTH_TOKEN_SECRET)
    twitter_stream = TwitterStream(auth=auth)
    stream = twitter_stream.statuses.filter(track=track)
    counter = 0
    for tweet in stream:
        if patriots_counter == TOTAL_TWEETS:
            print 'break'
            break
        elif counter % 500 == 0 and counter != 0:
            print 'get new stream'
            break
        else:
            patriots.append(tweet)
            patriots_counter += 1
            counter += 1
            print patriots_counter, counter

save_json('json/patriots', patriots)
Example #53
0
                    broncos.append(tweet), panthers.append(tweet)
                    broncos_counter += 1
                    panthers_counter += 1
                    print 'Panthers: %s, Broncos: %s' % (panthers_counter, broncos_counter)
                elif 'Broncos' in tweet['text']:
                    broncos.append(tweet)
                    broncos_counter += 1
                    print 'Broncos: %s' % broncos_counter
                elif 'Panthers' in tweet['text']:
                    panthers.append(tweet)
                    panthers_counter += 1
                    print 'Panthers: %s' % panthers_counter
                else:
                    print 'continue'
                    continue
        count_dict['broncos'] = broncos_counter
        count_dict['panthers'] = panthers_counter
        count_dict['end_time'] = datetime.now().strftime(format=date_format)
        counts.append(count_dict)

    print counts
    if counter != NUMBER_OF_COLLECTIONS:
        print 'Sleeping until %s' % (datetime.now() + timedelta(minutes=WAIT_TIME))
        sleep(WAIT_TIME * 60)
    else:
        print '------------------------------------------'

save_json('json/counts', counts)
save_json('json/broncos', broncos)
save_json('json/panthers', panthers)
 def guardarConfiguraciones(self):
     utils.save_json(sublime.packages_path()+"/plugins/configuraciones.json", self.configuraciones)
def repeat_simulation(varying_parameter, value_range, nb_replicates=3, max_integration_attempt=10,
                      value_range_start=0, save_directory="data"):

    spleeping_time = 30

    if not os.path.isfile("{}/{}.json".format(save_directory, varying_parameter)):

        json_dict = {"parameters": None, "specificity": None, "sensitivity": None,
                     "varying_parameter": varying_parameter,
                     "nb_replicates": nb_replicates}

        if isinstance(value_range, list):
            json_dict["value_range"] = value_range
        else:
            json_dict["value_range"] = value_range.tolist()


        logging.info("Creating new file: {}.json".format(varying_parameter))
        save_json(json_dict, varying_parameter)

        specificity = np.zeros((nb_replicates, len(value_range)))
        sensitivity = np.zeros((nb_replicates, len(value_range)))

    else:
        with open("{}/{}.json".format(save_directory, varying_parameter)) as json_file:
            input_json = json.load(json_file)

        sensitivity = np.array(input_json["sensitivity"])
        specificity = np.array(input_json["specificity"])

    for i, parameter_value in enumerate(value_range):
        if i >= value_range_start:
            for replicate in xrange(nb_replicates):

                info_message = "Starting new simulation with varying parameter: {}, value: {}, replicate {}/{}".format(
                varying_parameter, parameter_value, replicate+1, nb_replicates)
                logging.info(info_message)

                for integration_attempt in xrange(max_integration_attempt):
                    logging.info("Integration attempt {}/{}".format(integration_attempt+1, max_integration_attempt))
                    try:
                        specificity[replicate, i], sensitivity[replicate, i], parameters = start_simulation(
                            **{varying_parameter: parameter_value})

                    except IntegrationError, SteadyStateError:
                        logging.warning("Integration failed. Starting over...")
                        pass

                    else:
                        break

                    if integration_attempt == max_integration_attempt - 1:
                        error_text = "The maximum number of integration attempts was reached: {}\n"
                        error_text += "Ending program..."
                        error_text.format(max_integration_attempt)

                        logging.error(error_text)
                        raise IntegrationError(error_text)

                with open("{}/{}.json".format(save_directory, varying_parameter)) as json_file:
                    output = json.load(json_file)

                output["specificity"] = specificity.tolist()
                output["sensitivity"] = sensitivity.tolist()
                output["parameters"] = parameters

                logging.info("Saving new data in JSON file: {}".format(varying_parameter))
                save_json(output, varying_parameter)

                logging.info("Sleeping for {} seconds...".format(spleeping_time))
                time.sleep(spleeping_time)
Example #56
0
def main():
    r = []

    r.extend(plot("ground", num_filled=1, freq=1.0, fit_start=6))
    r.extend(plot("ground", num_filled=1, freq=0.5, fit_start=6))
    r.extend(plot("ground", num_filled=1, freq=0.28, fit_start=6))
    r.extend(plot("ground", num_filled=1, freq=0.1, fit_start=8))
    r.extend(plot("ground", num_filled=2, freq=1.0, fit_start=6))
    r.extend(plot("ground", num_filled=2, freq=0.5, fit_start=6))
    r.extend(plot("ground", num_filled=2, freq=0.28, fit_start=6))
    r.extend(plot("ground", num_filled=2, freq=0.1, fit_start=10))
    r.extend(plot("ground", num_filled=3, freq=1.0, fit_start=9))
    r.extend(plot("ground", num_filled=3, freq=0.5, fit_start=9))
    r.extend(plot("ground", num_filled=3, freq=0.28, fit_start=11))
    r.extend(plot("ground", num_filled=3, freq=0.1, fit_start=11))
    r.extend(plot("ground", num_filled=4, freq=1.0, fit_start=10))
    r.extend(plot("ground", num_filled=4, freq=0.5, fit_start=12,
                  fit_ranges={"hf": [10, 99999]}))
    r.extend(plot("ground", num_filled=4, freq=0.28, fit_start=14))
    r.extend(plot("ground", num_filled=4, freq=0.1, fit_start=11))
    r.extend(plot("ground", num_filled=5, freq=1.0, fit_start=13))
    r.extend(plot("ground", num_filled=5, freq=0.5, fit_start=14))
    r.extend(plot("ground", num_filled=5, freq=0.28, fit_start=17))
    r.extend(plot("ground", num_filled=5, freq=0.1, fit_start=11))
    r.extend(plot("ground", num_filled=6, freq=1.0, fit_start=16))
    r.extend(plot("ground", num_filled=6, freq=0.28, fit_start=15))
    r.extend(plot("ground", num_filled=6, freq=0.1, fit_start=15,
                  badness_threshold=0.0))

    r.extend(plot("add", num_filled=1, freq=1.0, fit_start=7))
    r.extend(plot("add", num_filled=1, freq=0.5, fit_start=7))
    r.extend(plot("add", num_filled=1, freq=0.28, fit_start=7))
    r.extend(plot("add", num_filled=2, freq=1.0, fit_start=9))
    r.extend(plot("add", num_filled=2, freq=0.5, fit_start=9))
    r.extend(plot("add", num_filled=2, freq=0.28, fit_start=10))
    r.extend(plot("add", num_filled=2, freq=0.1, fit_start=10))
    r.extend(plot("add", num_filled=3, freq=1.0, fit_start=13))
    r.extend(plot("add", num_filled=3, freq=0.5, fit_start=10))
    r.extend(plot("add", num_filled=3, freq=0.28, fit_start=13))
    r.extend(plot("add", num_filled=3, freq=0.1, fit_start=10))
    r.extend(plot("add", num_filled=4, freq=1.0, fit_start=13))
    r.extend(plot("add", num_filled=4, freq=0.28, fit_start=13))
    r.extend(plot("add", num_filled=4, freq=0.1, fit_start=11))
    r.extend(plot("add", num_filled=5, freq=1.0, fit_start=10))
    r.extend(plot("add", num_filled=5, freq=0.5, fit_start=10))
    r.extend(plot("add", num_filled=5, freq=0.28, fit_start=10))
    r.extend(plot("add", num_filled=5, freq=0.1, fit_start=10))

    r.extend(plot("rm", num_filled=1, freq=1.0, fit_start=7))
    r.extend(plot("rm", num_filled=1, freq=0.5, fit_start=7))
    r.extend(plot("rm", num_filled=1, freq=0.28, fit_start=7,
                  fit_ranges={"hf+qdpt3": [12, 99999]}))
    r.extend(plot("rm", num_filled=2, freq=1.0, fit_start=10))
    r.extend(plot("rm", num_filled=2, freq=0.5, fit_start=7))
    r.extend(plot("rm", num_filled=2, freq=0.28, fit_start=10))
    r.extend(plot("rm", num_filled=2, freq=0.1, fit_start=9))
    r.extend(plot("rm", num_filled=3, freq=1.0, fit_start=11))
    r.extend(plot("rm", num_filled=3, freq=0.5, fit_start=9))
    r.extend(plot("rm", num_filled=3, freq=0.28, fit_start=11))
    r.extend(plot("rm", num_filled=3, freq=0.1, fit_start=9))
    r.extend(plot("rm", num_filled=4, freq=1.0, fit_start=13))
    r.extend(plot("rm", num_filled=4, freq=0.5, fit_start=10))
    r.extend(plot("rm", num_filled=4, freq=0.28, fit_start=10))
    r.extend(plot("rm", num_filled=4, freq=0.1, fit_start=10))
    r.extend(plot("rm", num_filled=5, freq=1.0, fit_start=13))
    r.extend(plot("rm", num_filled=5, freq=0.5, fit_start=10))
    r.extend(plot("rm", num_filled=5, freq=0.28, fit_start=10))
    r.extend(plot("rm", num_filled=5, freq=0.1, fit_start=11))

    utils.save_json("fit_results.json", r)
if False:
    s = '''It turns out "why is" that's using. Doesn't it? Can't i'''
    t = tokenize(s)
    print(t)
    assert False

print('Tokenization:')
t0 = time.clock()
train_tokens = [tokenize(s, token_vector) for s in train[COMMENT]]
print('train_tokens: %1.f sec %.2f sec / token' % (time.clock() - t0, (time.clock() - t0) / len(train_tokens)))
t0 = time.clock()
test_tokens = [tokenize(s, token_vector) for s in test[COMMENT]]
print('test_tokens: %1.f sec %.2f sec / token' % (time.clock() - t0, (time.clock() - t0) / len(test_tokens)))

save_pickle('token.vector.pkl', token_vector)
save_json('train.tokens.json', train_tokens)
save_json('test.tokens.json', test_tokens)

token_vector = load_pickle('token.vector.pkl')
train_tokens = load_json('train.tokens.json')
test_tokens = load_json('test.tokens.json')


def compute_ngram_vector(token_list, n):
    """Compute an embedding vector for all n-grams in token_list
    """
    vec = np.zeros((n, SPACY_VECTOR_SIZE), dtype=np.float64)
    n_vecs = len(token_list) - n + 1
    for i in range(n_vecs):
        for j in range(n):
            vec[j] += token_vector[token_list[i + j]]
Example #58
0
	def run(self):
		self.packages={}
		self.explore(PATH_JSON)
		utils.save_json(PATH_INDEX_PACKAGES, self.packages)
		utils.save_json(PATH_INDEX_CLASSES, list(self.packages.keys()))
Example #59
0
def clean_test_urls():
	save_json('test-history.urls', [])