Python load_objの例、utils.load_obj Pythonの例

コード例 #1

0

ファイルを表示

ファイル: eval.py プロジェクト: marcwww/langid

def load_whole(mdir):
    obj_path = os.path.join(mdir, 'args.pkl')
    assert os.path.exists(obj_path)
    args = utils.load_obj(obj_path)
    cdir = args.cdir

    ft_names = [
        '1-gram', '2-gram', '3-gram', '4-gram', 'unicode-block', 'word'
    ]
    ft_extractors = {name: None for name in ft_names}
    for name in ft_extractors:
        cache_path = os.path.join(cdir, f'{name}.pkl')
        assert os.path.exists(cache_path)
        ft_extractors[name] = utils.load_obj(cache_path)

    cache_path = os.path.join(cdir, 'lang.pkl')
    assert os.path.exists(cache_path)
    LANG = utils.load_obj(cache_path)

    mdl = FeedforwardNetwork(args, ft_extractors, LANG)
    fmdl = os.path.join(mdir, 'mdl.pkl')
    mdl.load_state_dict(torch.load(fmdl, map_location=torch.device('cpu')))
    mdl.eval()
    utils.log(f'Loaded model from {fmdl}')

    iso_639_4 = pd.read_csv('ISO-639-4.csv', '\t')
    lang2label = {row.iso: row.label for idx, row in iso_639_4.iterrows()}
    utils.log(f'Loaded ISO-639-4')

    return ft_extractors, LANG, cdir, mdl, lang2label

コード例 #2

0

ファイルを表示

ファイル: Project5.py プロジェクト: shacocn/EE219

def Q1_3():
    hashtags = [
        '#gohawks', '#nfl', '#sb49', '#gopatriots', '#patriots', '#superbowl'
    ]
    for tag in hashtags:
        X = load_obj(tag + '_Q13')[:-1, :]
        y = load_obj(tag + '_numTweetsInHour')[1:]
        model = stats_api.OLS(y, X)
        res = model.fit()
        y_pred = res.predict(X)
        y_resid = y - y_pred
        sum_err = pow(y_resid, 2)
        sum_err = np.sum(sum_err)
        print(res.summary())
        #     print(sum_err)
        rmse = sqrt(sum_err / len(y_resid))
        print('%s has RMSE of %.3f' % (tag, rmse))

        features = [
            'mentionCount', 'rankScore', 'passitivity',
            'co-occurrence_of_tags', 'unique_author'
        ]
        for i in [0, 2, 3]:
            x_plt = X[:, i]
            ys = [[y, 'Predictant']]
            x_label = features[i]
            y_label = 'number of tweets for next hour'
            title = tag + ', ' + x_label
            make_plot(x_plt,
                      ys,
                      scatter=True,
                      xlabel=x_label,
                      ylabel=y_label,
                      title=title)
        print('=============================')

コード例 #3

0

ファイルを表示

    def configure_optimizers(self):
        if 'decoder_lr' in self.cfg.optimizer.params.keys():
            params = [
                {
                    'params': self.model.decoder.parameters(),
                    'lr': self.cfg.optimizer.params.lr
                },
                {
                    'params': self.model.encoder.parameters(),
                    'lr': self.cfg.optimizer.params.decoder_lr
                },
            ]
            optimizer = load_obj(self.cfg.optimizer.class_name)(params)

        else:
            optimizer = load_obj(self.cfg.optimizer.class_name)(
                self.model.parameters(), **self.cfg.optimizer.params)
        scheduler = load_obj(self.cfg.scheduler.class_name)(
            optimizer, **self.cfg.scheduler.params)

        return [optimizer], [{
            "scheduler": scheduler,
            "interval": self.cfg.scheduler.step,
            'monitor': self.cfg.scheduler.monitor
        }]

コード例 #4

0

ファイルを表示

def get_datasets(period_params, symbols_list_name, thresholds_lst,
                 target_shift,
                 mode='all', datasets=None):
    print("Initializing datasets for periods: %s" % period_params)
    if not datasets:
        datasets = {}
    for thresholds in thresholds_lst:
        for resample_period, magic_number in period_params:
            normal_name, z_name = get_datasets_name(resample_period,
                                                    symbols_list_name,
                                                    thresholds,
                                                    target_shift)

            normal_file = os.path.join(DATA_PATH, normal_name)
            z_file = os.path.join(DATA_PATH, z_name)

            if exists_obj(normal_file) and exists_obj(z_file):
                print("Loading from cache:\n * %s\n * %s" % (
                    normal_file, z_file))
                dfn = load_obj(normal_file)
                dfz = load_obj(z_file)
            else:
                dfn, dfz = get_data(resample_period=resample_period,
                                    symbols_list_name=symbols_list_name,
                                    thresholds=thresholds,
                                    target_shift=target_shift)

            if mode == 'all' or mode == 'normal':
                datasets[normal_name] = (dfn, magic_number, thresholds)
            if mode == 'all' or mode == 'z-score':
                datasets[z_name] = (dfz, magic_number, thresholds)

    return datasets

コード例 #5

0

ファイルを表示

    def configure_optimizers(self):
        """TODO Add missing docstring."""
        if "decoder_lr" in self.cfg.optimizer.params.keys():
            params = [
                {
                    "params": self.model.decoder.parameters(),
                    "lr": self.cfg.optimizer.params.lr,
                },
                {
                    "params": self.model.encoder.parameters(),
                    "lr": self.cfg.optimizer.params.decoder_lr,
                },
            ]
            optimizer = load_obj(self.cfg.optimizer.class_name)(params)
        else:
            optimizer = load_obj(self.cfg.optimizer.class_name)(
                self.model.parameters(), **self.cfg.optimizer.params)
            scheduler = load_obj(self.cfg.scheduler.class_name)(
                optimizer, **self.cfg.scheduler.params)

        return (
            [optimizer],
            [{
                "scheduler": scheduler,
                "interval": self.cfg.scheduler.step,
                "monitor": self.cfg.scheduler.monitor,
            }],
        )

コード例 #6

0

ファイルを表示

ファイル: model.py プロジェクト: imneonizer/pytorch_retinanet

    def configure_optimizers(self, *args, **kwargs):
        opt = self.conf.optimizer.class_name
        self.optimizer = load_obj(opt)(self.net.parameters(),
                                       **self.conf.optimizer.params)
        if self.conf.scheduler.class_name is None:
            return [self.optimizer]

        else:
            schedps = self.conf.scheduler
            __scheduler = load_obj(schedps.class_name)(self.optimizer,
                                                       **schedps.params)
            if not self.conf.scheduler.monitor:
                self.scheduler = {
                    "scheduler": __scheduler,
                    "interval": schedps.interval,
                    "frequency": schedps.frequency,
                }
            else:
                self.scheduler = {
                    "scheduler": __scheduler,
                    "interval": schedps.interval,
                    "frequency": schedps.frequency,
                    "monitor": schedps.monitor,
                }

            return [self.optimizer], [self.scheduler]

コード例 #7

0

ファイルを表示

 def load_index(self, fn):
     """
     Loads a pre-computed index (or indices) so we can answer queries.
     Input:
         fn - file name of pickled index.
         read from disk
     """
     utils.load_obj(fn)

コード例 #8

0

ファイルを表示

ファイル: orchestrate_parsing.py プロジェクト: yigalk89/Search_Engine

def merge_index(config, files_num):
    """
    The function loads all the temporary index files that was made by the parse_and_index function and merge them into
    a united index.
    The function deals with the capital letters rule, where all the occurences of a term are starting with capital
    letters, it will be save in all capital. Otherwise it will be saved in the lower version.
    The function also merge the entites into the inverted index in case they appear in the corpus more than once.
    The function save the merged index to the disk for future use.
    :param config: config class that contains info about where to retrieve the saved files
    :param files_num: How many temporary files to merge in each category
    :return: Number of total terms in the index
    """
    merged_index = {}

    # Just merge all the terms in the index into one index
    file_prefix = config.get_save_files_dir() + "/tmp/inverted_idx_"
    for i in range(files_num):
        current_index = utils.load_obj(file_prefix + str(i))
        for term, apperances in current_index.items():
            if term not in merged_index.keys():
                merged_index[term] = apperances
            else:
                merged_index[term] += apperances

    # Handle the capital restriction
    merged_index_after_cap = {}
    for term, value in merged_index.items():
        if term[0].islower():
            if term not in merged_index_after_cap.keys():
                merged_index_after_cap[term] = value
            else:
                merged_index_after_cap[term] += value
        else:  # case it contains uppercase
            if term.lower() in merged_index.keys(
            ):  # case there is the same term in lower somewhere in the corpus
                if term.lower() not in merged_index_after_cap.keys():
                    merged_index_after_cap[term.lower()] = value
                else:
                    merged_index_after_cap[term.lower()] += value
            else:  # case it is actually capital only
                merged_index_after_cap[term.upper()] = value

    # Check if an entity appears more than once in the corpus it's being added to the index
    entities_idxs_prefix = config.get_save_files_dir() + "/tmp/entities_idx_"
    for i in range(files_num):
        current_entities = utils.load_obj(entities_idxs_prefix + str(i))
        for term, apperances in current_entities.items():
            if apperances > 1:
                merged_index_after_cap[term] = apperances

    total_terms = len(merged_index)
    #print("Total num of terms: {}".format(total_terms))
    # Save the merged index to disk
    saving_dir = config.get_save_files_dir()
    utils.save_obj(merged_index_after_cap, saving_dir + "/inverted_index")

    return total_terms

コード例 #9

0

ファイルを表示

def get_training_dataset(cfg: DictConfig = None) -> Dict[str, Dataset]:
    """
    Get training and validation datasets.

    Parameters
    ----------
    cfg : DictConfig, optional
        Project configuration, by default None

    Returns
    -------
    Dict[str, Dataset]
        {"train": train_dataset, "valid": valid_dataset}
    """
    images_dir = to_absolute_path(cfg.data.images_folder_path)

    data = pd.read_csv(to_absolute_path(cfg.data.dataset_path))
    data["x1"] = data["x"] + data["w"]
    data["y1"] = data["y"] + data["h"]
    data["area"] = data["w"] * data["h"]

    train_ids, valid_ids = train_test_split(
        data["image_id"].unique(),
        test_size=cfg.data.validation_split,
        random_state=cfg.training.seed,
    )

    # for fast training
    if cfg.training.debug:
        train_ids = train_ids[:10]
        valid_ids = valid_ids[:10]

    train_df = data.loc[data["image_id"].isin(train_ids)]
    valid_df = data.loc[data["image_id"].isin(valid_ids)]

    train_augs_list = [
        load_obj(i["class_name"])(**i["params"])
        for i in cfg["augmentation"]["train"]["augs"]
    ]
    train_bbox_params = OmegaConf.to_container(
        (cfg["augmentation"]["train"]["bbox_params"])
    )
    train_augs = Compose(train_augs_list, bbox_params=train_bbox_params)

    valid_augs_list = [
        load_obj(i["class_name"])(**i["params"])
        for i in cfg["augmentation"]["valid"]["augs"]
    ]
    valid_bbox_params = OmegaConf.to_container(
        (cfg["augmentation"]["valid"]["bbox_params"])
    )
    valid_augs = Compose(valid_augs_list, bbox_params=valid_bbox_params)

    train_dataset = XrayDataset(train_df, "train", images_dir, cfg, train_augs)
    valid_dataset = XrayDataset(valid_df, "valid", images_dir, cfg, valid_augs)

    return {"train": train_dataset, "valid": valid_dataset}

コード例 #10

0

ファイルを表示

    def __init__(self, args):
        device = torch.device(args.gpu if args.gpu != -1 else 'cpu')
        self.device = device
        ftrain = args.ftrain
        fvalid = args.fvalid
        ftest = args.ftest
        futable = args.futable
        bsz = args.bsz
        cdir = args.cdir
        train, valid, test = self.load_data(ftrain), \
                             self.load_data(fvalid), \
                             self.load_data(ftest)
        ft_extractors = {f'{n}-gram': NgramFeature(n, vsize) for n, vsize in \
                         zip([1, 2, 3, 4], [args.vsizes[VSIZE_1GRAM],
                                            args.vsizes[VSIZE_2GRAM],
                                            args.vsizes[VSIZE_3GRAM],
                                            args.vsizes[VSIZE_4GRAM]])}
        ft_extractors['unicode-block'] = UnicodeBlockFeature()
        ft_extractors['word'] = WordFeature(args.vsizes[VSIZE_WORD])
        for name in ft_extractors:
            cache_path = os.path.join(cdir, f'{name}.pkl')
            if os.path.exists(cache_path):
                ft_extractors[name] = utils.load_obj(cache_path)
            else:
                utils.log(f'Building feature {name}')
                if 'gram' in name:
                    ft_extractors[name].build(train.txt)
                elif name == 'unicode-block':
                    ft_extractors[name].build(futable)
                elif name == 'word':
                    ft_extractors[name].build(train.txt)
                else:
                    raise NotImplementedError
                utils.save_obj(ft_extractors[name], cache_path)

        cache_path = os.path.join(cdir, 'lang.pkl')
        LANG = Lang()
        if os.path.exists(cache_path):
            LANG = utils.load_obj(cache_path)
        else:
            utils.log('Building LANG')
            LANG.build(train.lang)
            utils.save_obj(LANG, cache_path)

        utils.log('Building batches')
        self.train_iter, _ = self.build_batches(train, cdir, 'train',
                                                ft_extractors, bsz, LANG, True,
                                                device)
        self.valid_iter, _ = self.build_batches(valid, cdir, 'valid',
                                                ft_extractors, bsz, LANG,
                                                False, device)
        self.test_iter, _ = self.build_batches(test, cdir, 'test',
                                               ft_extractors, bsz, LANG, False,
                                               device)
        self.ft_extractors = ft_extractors
        self.LANG = LANG

コード例 #11

0

ファイルを表示

 def expand_query(self, query_as_list):
     new_query_list = []
     embedding_dict = utils.load_obj("embedding_dict")
     new_embedding_dict = utils.load_obj("new_embedding_dict")
     for term in query_as_list:
         if term in embedding_dict.keys():
             new_query_list.extend(
                 find_closest_embeddings(embedding_dict[term], 4,
                                         new_embedding_dict))
     return new_query_list

コード例 #12

0

ファイルを表示

    def init_embeddings(self):
        if self.one_hot_embed:
            embed_arr = utils.load_obj('datasets/context/embeddings/one_hot_33_dim')
        else:
            embed_arr = utils.load_obj('datasets/context/embeddings/norm_embed_arr_' + str(self.embed_dim))

        num_classes = embed_arr.shape[0]
        self.embeddings = torch.nn.Embedding(num_classes, self.embed_dim)
        self.embeddings.weight.requires_grad = False
        self.embeddings.weight.data.copy_(torch.from_numpy(embed_arr))

コード例 #13

0

ファイルを表示

 def set_precomputed_ct(self, base_obj_path, ancestor_dict_path,
                        sample_idx_vec_path, point_num):
     self.ct = load_obj(base_obj_path)
     self.ct.__init__()
     self.point_num = 1000
     self.ct.point_num = self.point_num
     self.ct.ancestor_dict = load_obj(ancestor_dict_path)
     self.ct.sample_idx_vec_dict = load_obj(sample_idx_vec_path)
     self.ct.fidx_vec = np.array(
         [fidx for fidx in self.ct.sample_idx_vec_dict.keys()])

コード例 #14

0

ファイルを表示

ファイル: load_onnx.py プロジェクト: shinoyuki222/DemoML

 def Init_model(self):
     #init dataloader
     self.data_loader = DataLoader_test(self.save_dir)
     # init model
     self.ort_session = onnxruntime.InferenceSession(self.save_dir +
                                                     self.model_nm)
     # init dict
     self.idx2lbl = load_obj(self.save_dir + "idx2lbl.json")
     self.idx2cls = load_obj(self.save_dir + "idx2cls.json")
     # get valid slot for a specific intent
     self.idx_mask = load_obj(self.save_dir + "idx_mask_onnx.json")

コード例 #15

0

ファイルを表示

    def initialize(self, services):
        self.services = services
        self.valid_actions_getter = MyValidActionsGetter(
            self.services.parser, self.services.perception)
        self.uncompleted_goals = self.services.goal_tracking.uncompleted_goals

        if os.path.exists(self.env_name + "_transitions"):
            self.transitions = load_obj(self.env_name + "_transitions")
        if os.path.exists(self.env_name + "_state_action_transition_count"):
            self.state_action_transition_count = load_obj(
                self.env_name + "_state_action_transition_count")

コード例 #16

0

ファイルを表示

 def get_val_slides(self, resample_round):
     patients_train = load_obj(
         'train_img_paths_DX_round_{}'.format(resample_round),
         self.DATA_SPLIT_DIR + 'train/')
     patients_train = list(
         set([p.split('/')[-1].split('.')[0][:15] for p in patients_train]))
     patients_val = load_obj(
         'val_img_paths_DX_round_{}'.format(resample_round),
         self.DATA_SPLIT_DIR + 'val/')
     patients_val = list(
         set([p.split('/')[-1].split('.')[0][:15] for p in patients_val]))
     return patients_val, patients_train

コード例 #17

0

ファイルを表示

ファイル: train.py プロジェクト: sergeyshilin/xray-lightning

def train(cfg: DictConfig) -> None:
    """
    Run model training.

    Parameters
    ----------
    cfg : DictConfig
        Project configuration object
    """
    model = load_obj(cfg.model.backbone.class_name)
    model = model(**cfg.model.backbone.params)

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    head = load_obj(cfg.model.head.class_name)

    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = head(in_features,
                                         cfg.model.head.params.num_classes)

    set_seed(cfg.training.seed)
    hparams = flatten_omegaconf(cfg)
    xray_detection = XrayDetection(hparams=hparams, cfg=cfg, model=model)

    callbacks = xray_detection.get_callbacks()
    loggers = xray_detection.get_loggers()

    trainer = pl.Trainer(
        logger=loggers,
        early_stop_callback=callbacks["early_stopping"],
        checkpoint_callback=callbacks["model_checkpoint"],
        **cfg.trainer,
    )
    trainer.fit(xray_detection)

    # Load the best checkpoint
    get_logger().info("Saving model from the best checkpoint...")
    checkpoints = [
        ckpt for ckpt in os.listdir("./")
        if ckpt.endswith(".ckpt") and ckpt != "last.ckpt"
    ]
    best_checkpoint_path = checkpoints[0]

    model = XrayDetection.load_from_checkpoint(best_checkpoint_path,
                                               hparams=hparams,
                                               cfg=cfg,
                                               model=model)

    save_best(model, cfg)

コード例 #18

0

ファイルを表示

ファイル: indexer.py プロジェクト: chanabanana97/Search_Engine-maste

    def merge_files(self, out, letter,
                    file_name_letter_idx):  # temp_letter_dict):

        permanent_file_name = out + letter
        file_name_letter_idx = utils.load_obj(out + file_name_letter_idx)
        permanent_dict_file = utils.load_obj(permanent_file_name)

        for key in file_name_letter_idx:
            if key in permanent_dict_file:
                permanent_dict_file[key].extend(file_name_letter_idx[key])
            else:
                permanent_dict_file[key] = file_name_letter_idx[key]

        utils.save_obj(permanent_dict_file, permanent_file_name)

コード例 #19

0

ファイルを表示

ファイル: load_onnx.py プロジェクト: shinoyuki222/DemoML

    def __init__(self, save_dir):
        self.save_dir = save_dir
        self.word2idx = load_obj(self.save_dir + "dict.json")
        self.config = load_obj(self.save_dir + "Config.json")
        self.max_len = self.config["max_len"]

        self.WORD = {int(k): v for k, v in self.config["WORD"].items()}
        self.BOS = self.config["BOS"]
        self.UNK = self.config["UNK"]
        self.PAD = self.config["PAD"]

        assert self.BOS == self.word2idx[self.WORD[self.BOS]]
        assert self.UNK == self.word2idx[self.WORD[self.UNK]]
        assert self.PAD == self.word2idx[self.WORD[self.PAD]]

コード例 #20

0

ファイルを表示

ファイル: dataloader.py プロジェクト: yuanmengzhixing/SemArt

    def __init__(self, args_dict, set, w2i_tit, w2i, transform=None):
        """
        Args:
            set: 'train', 'val', 'test
            w2i_tit: word to index for titles
            w2i: word to index for comments
            transform: data transform
        """
        self.args_dict = args_dict
        self.set = set

        # Load Data
        if self.set == 'train':
            textfile = args_dict.csvtrain
            self.mismtch = 0.8
        elif self.set == 'val':
            textfile = args_dict.csvval
            self.mismtch = 0
        elif self.set == 'test':
            textfile = args_dict.csvtest
            self.mismtch = 0
        df = pd.read_csv(textfile, delimiter='\t')
        self.imageurls = list(df['IMAGE_FILE'])
        self.comment_map = get_mapped_text(df, w2i, field='DESCRIPTION')
        self.titles_map = get_mapped_text(df, w2i_tit, field='TITLE')

        # Parameters
        self.numpairs = len(df) / (1 - self.mismtch)
        self.comw2i = w2i
        self.titw2i = w2i_tit
        # self.titw2i = dict([(w, i) for i, w in enumerate(titvocab)])
        self.imagefolder = args_dict.dir_images
        self.transform = transform

        # tfidf weights and vectors
        if os.path.exists(args_dict.dir_data + args_dict.tfidf_coms_file):
            self.tfidf_coms = load_obj(args_dict.dir_data +
                                       args_dict.tfidf_coms_file)
        else:
            self.tfidf_coms = self.get_tfidf(self.comment_map, self.comw2i)
            save_obj(self.tfidf_coms,
                     args_dict.dir_data + args_dict.tfidf_coms_file)

        if os.path.exists(args_dict.dir_data + args_dict.tfidf_tits_file):
            self.tfidf_tits = load_obj(args_dict.dir_data +
                                       args_dict.tfidf_tits_file)
        else:
            self.tfidf_tits = self.get_tfidf(self.titles_map, self.titw2i)
            save_obj(self.tfidf_tits,
                     args_dict.dir_data + args_dict.tfidf_tits_file)

コード例 #21

0

ファイルを表示

ファイル: orchestrate_parsing.py プロジェクト: yigalk89/Search_Engine

def merge_posting_letter(saving_dir, prefix, files_num, inverted_idx):
    """
    Merge one posting file, by it's prefix. (This task is dispatched to several processes so it runs in parallel)
    It reads all the posting dict and the entities candidate_dicts with the relevant prefix and merge them into one.
    It also makes sure that entities and capital letters are aligned with the way we dealt with it in the inverted idx
    :param saving_dir: Where to save the output and find the temp files
    :param prefix: Which posting prefix this task is being applied to
    :param files_num: How many temp files to read
    :param inverted_idx: The inverted index of the corpus that contains all the final version ok keys
    :return: Which prefix this task worked on
    """

    #print("merging posting of prefix {}, files_num: {}".format(prefix, files_num))
    loading_dir = saving_dir + '/tmp'
    file_prefix = loading_dir + "/postingDict_" + prefix + "_"
    entities_prefix = loading_dir + "/entitiesDict_" + prefix + "_"
    merged_letter_posting = {}

    # Merge all the posting entries
    for i in range(files_num):
        try:
            current_letter_posting = utils.load_obj(file_prefix + str(i))
            for term, apperances in current_letter_posting.items():
                if term in merged_letter_posting.keys():  # already found term
                    merged_letter_posting[term] += apperances
                else:
                    if term in inverted_idx.keys(
                    ):  # term capital that is valid, or a lower one
                        merged_letter_posting[term] = apperances
                    else:  # capital term candidate that haven't made it, will be lowered
                        merged_letter_posting[term.lower()] = apperances

            # load entities_posting and merge it
            curent_entity_posting = utils.load_obj(entities_prefix + str(i))
            for term, apperances in curent_entity_posting.items():
                if term in inverted_idx.keys():  # Valid entity
                    merged_letter_posting[term] = apperances
        except:
            pass

    # Sort every posting entry by it doc_id
    for postings_entry in merged_letter_posting.values():
        postings_entry.sort(key=lambda x: x[0])

    # Save relevant posting dict
    utils.save_obj(merged_letter_posting,
                   saving_dir + "/postingDict_" + prefix)
    #print("saved {} posting dict".format(prefix))
    return prefix

コード例 #22

0

ファイルを表示

ファイル: search_engine.py プロジェクト: eilamgal/Search_Engine

def load_tweet_dict():
    """
    read the tweet vector files and insert the vectors to the tweet dictionary
    :return tweet_Dictionary including the Glove vector data
    """
    tweet_dict = utils.load_obj("docDictionary")
    buckets = []
    for i in range(tweet_dict["metadata"]["tweet_vector_buckets"]):
        buckets.append(utils.load_obj("avgVector" + str(i)))
    for tweet_id in tweet_dict.keys():
        if tweet_id == "metadata":
            continue
        address = tweet_dict[tweet_id][5]
        tweet_dict[tweet_id][5] = buckets[address[0]][address[1]]
    return tweet_dict

コード例 #23

0

ファイルを表示

ファイル: modernization.py プロジェクト: WeftWiki/phetools

 def load_blacklist(self):
     filename = self.blacklist_filename()
     if not os.path.exists(filename):
         blacklist = set()
     else:
         blacklist = utils.load_obj(filename)
     return blacklist

コード例 #24

0

ファイルを表示

ファイル: indexer.py プロジェクト: geniss/Search_Engine

 def load_index(self, fn):
     """
     Loads a pre-computed index (or indices) so we can answer queries.
     Input:
         fn - file name of pickled index.
     """
     self.inverted_idx, self.documents = utils.load_obj(fn)

コード例 #25

0

ファイルを表示

def reconstruct_from_postings(output_path, stemming):
    postings = glob(output_path + "\\{}\\*.pkl".format("WithStem" if stemming else "WithoutStem"), recursive=True)

    reconstructed = set()
    corpus_size = 0
    total_length = 0
    for posting in postings:

        if "inverted_idx" not in posting:

            splited_path = os_path_splitext(posting)
            print(splited_path)
            file = utils.load_obj(splited_path[0])

            for doc_list in file.values():

                for doc in doc_list:

                    doc_id = doc[0]
                    doc_length = doc[4]

                    if doc_id not in reconstructed:
                        reconstructed.add(doc_id)
                        total_length += doc_length
                        corpus_size += 1

    return corpus_size, float(total_length) / corpus_size

コード例 #26

0

ファイルを表示

def train(df, attrs, clf_class, clf_name, model_params, mode, magic_number,
          dates, dataset_name, trading_params):
    trade_freq = trading_params['trade_frequency']
    name = '%s-%s-attr%s-%s-%s-%s-%s-%s_' % (
        clf_name, dataset_name, len(attrs), dict_to_str(model_params).replace(
            ' ', '_').replace(':', ''), mode, magic_number,
        pd.to_datetime(dates[0], format=DATE_FORMAT).date(),
        pd.to_datetime(dates[1], format=DATE_FORMAT).date())
    cached_file = os.path.join(CACHE_PATH + '/models/', name)

    start_date, final_date = dates
    idx = 0

    indices = sorted([
        day for day in list(set(df.index.values))
        if start_date <= day <= final_date
    ])

    print("Model and params: %s %s " % (clf_name, model_params))
    # magic number is by default 53, 52 weeks for training 1 for prediction
    while idx + magic_number < len(indices) and indices[idx + magic_number] <= \
            indices[-1]:

        if mode == CLASSIFICATION:
            train_x, train_y, test_x, test_y = \
                get_classification_data(clf_name, df, attrs, indices, idx,
                                        magic_number)
        elif mode == REGRESSION:
            # get regression datasets (target is float y -> ratio of increase)
            train_x, train_y, test_x, test_y = \
                get_regression_data(clf_name, df, attrs, indices, idx,
                                    magic_number)

        print(
            "Training %s/%s with %s instances." %
            (idx // trade_freq, len(indices) // trade_freq, train_x.shape[0]))
        sys.stdout.flush()

        clf_cached_file = cached_file + str(indices[idx])[:10]

        if not CHECKPOINTING:
            clf = clf_class(**model_params).fit(train_x, train_y)
        else:
            try:
                clf = load_obj(clf_cached_file)
            except:
                clf = clf_class(**model_params).fit(train_x, train_y)
                save_obj(clf, clf_cached_file)

        pred = clf.predict(test_x)

        # import ipdb
        # ipdb.set_trace()
        df.loc[indices[idx + magic_number], clf_name] = pred

        idx += trade_freq
    df_trade = df.dropna(axis=0)

    print("Finished training for %s" % (clf_name))
    return df_trade

コード例 #27

0

ファイルを表示

ファイル: data_manager.py プロジェクト: kojikoji/stge

 def set_default_ct(self):
     self.ct = load_obj("cell_tracker_with_lineage")
     self.ct.__init__()
     self.point_num = 1000
     self.ct.point_num = self.point_num
     self.ct.fidx_vec = np.array(
         [fidx for fidx in self.ct.sample_idx_vec_dict.keys()])

コード例 #28

0

ファイルを表示

ファイル: knn.py プロジェクト: melisabok/nlp-final-project

def classify(k, text):

    target_vec = lda_all.get_document_topics(dictionary_all.doc2bow(
        utils.tokenize(sampletext)),
                                             per_word_topics=True)[0]

    closest_points = []

    with open('./data/corpus-labels.csv') as labels:
        labelreader = csv.reader(labels)

        if not os.path.exists('./data/ldaspace-titles-abstracts.pkl'):
            print "data/ldaspace-titles-abstract.pkl not found. Generating file (this may take a while)"
            save_pointcloud('./data/ldaspace-titles-abstracts')

        ldaspace = utils.load_obj('./data/ldaspace-titles-abstracts')

        for l, current_vec in zip(labelreader, ldaspace):

            dist = get_distance(current_vec, target_vec)
            if len(closest_points) >= k:
                if dist < closest_points[k - 1]:
                    closest_points.pop(k - 1)
                    closest_points.append((l, dist))
            else:
                closest_points.append((l, dist))

            closest_points.sort(key=lambda point: point[1])

    category_counter = Counter()
    for x in closest_points:
        category_counter.update(x[0])

    return category_counter

コード例 #29

0

ファイルを表示

ファイル: main.py プロジェクト: rahular/robust-maml

def test(args):
    # see if we already ran this experiment
    code_root = os.path.dirname(os.path.realpath(__file__))
    exp_dir = utils.get_path_from_args(
        args) if not args.output_dir else args.output_dir
    path = "{}/results/{}".format(code_root, exp_dir)
    assert os.path.isdir(path)
    task_family_test = tasks_sine.RegressionTasksSinusoidal(
        "test", args.skew_task_distribution)
    best_valid_model = utils.load_obj(os.path.join(path,
                                                   "logs")).best_valid_model
    k_shots = [5, 10, 20, 40]
    df = []
    for k_shot in k_shots:
        losses = np.array(
            eval(
                args,
                copy.copy(best_valid_model),
                task_family=task_family_test,
                num_updates=10,
                lr_inner=0.01,
                n_tasks=1000,
                k_shot=k_shot,
            ))
        for grad_step, task_losses in enumerate(losses.T, 1):
            new_rows = [[k_shot, grad_step, tl] for tl in task_losses]
            df.extend(new_rows)

    df = pd.DataFrame(df, columns=["k_shot", "grad_steps", "loss"])
    df.to_pickle(os.path.join(path, "res.pkl"))
    utils.plot_df(df, path)

コード例 #30

0

ファイルを表示

ファイル: age_gender_model.py プロジェクト: ferhatminder/Age-and-Gender-Classification-Using-CNN-Keras

    def __init__(self, model_variables, variables, database):
        self.MV = model_variables
        self.Vars = variables
        self.DB = database
        self.model = None
        self.weight_save_path = "saved_weights"
        self.outputs_path = "outputs"
        self.model_name = self.Vars["name"]

        if self.Vars["class"] == "age":
            self.model_class = "age"
            self.class_count = self.DB.age_class_count
            self.class_labels = self.DB.age_labels
            self.db_train_path = self.DB.db_age_train_folder_path
            self.db_test_path = self.DB.db_age_test_folder_path
            self.mean_image = myutils.load_image(self.DB.age_mean_image_path)
        elif self.Vars["class"] == "sex":
            self.model_class = "sex"
            self.class_count = self.DB.sex_class_count
            self.class_labels = self.DB.sex_labels
            self.db_train_path = self.DB.db_sex_train_folder_path
            self.db_test_path = self.DB.db_sex_test_folder_path
            self.mean_image = myutils.load_image(self.DB.sex_mean_image_path)

        self.class_weights = myutils.load_obj(self.DB.db_new_path + "/" +
                                              self.model_class)

コード例 #31

0

ファイルを表示

def show_examples(args):

    # Load reason instances and embeddings
    fileembds = os.path.join(args.data_dir, args.embsfile)
    embeddings = utils.load_obj(fileembds)
    allreasons = read_data(args)
    allinds = list(range(len(allreasons)))
    assert len(allreasons) == embeddings.shape[0]
    numReasons = len(allreasons)

    # Get a random instance, compute scores and show most similar
    thisidx = random.sample(allinds, 1)[0]
    thisreason = allreasons[thisidx]
    print('-' * 25)
    print("REASON: {}".format(thisidx))
    print(thisreason)
    print('.')

    # Compute scores and sort
    allscores = sklearn.metrics.pairwise.cosine_similarity(embeddings)
    thisscores = allscores[thisidx, :]
    ranking = np.argsort(thisscores)[::-1].tolist()
    sortedscores = np.sort(thisscores)[::-1].tolist()

    # show top 5
    numshow = 10
    print("MATCHES")
    for k in list(range(numshow)):
        kidx = ranking[k]
        score = sortedscores[k]
        reason = allreasons[kidx]
        print("sample %d, score %.03f: %s" % (kidx, score, reason))

    return allscores

コード例 #32

0

ファイルを表示

ファイル: modernization.py プロジェクト: WeftWiki/phetools

    def load_dicts(self, variant):
        filename = self.cache_filename(variant)
        if not os.path.exists(filename):
            cache = self.default_cache()
        else:
            cache = utils.load_obj(filename)

        return cache

コード例 #33

0

ファイルを表示

ファイル: selector.py プロジェクト: waldol1/formCluster

def main(args):

    if(len(args) != 2):
                print "Usage: mds.py clustering.pkl"
                print "     C is the cluster in clustering.pkl to display"           
                sys.exit(0)

    path = args[1]
                
    print "Loading"
    clusters = clustering = utils.load_obj(path)

    #map(lambda c: c.set_label(), clustering)
    for i in [5]:  
        clusters = reclusterWithOPTICS(clusters, i)
    
        _docs = reduce(lambda x,y: x+y, map(lambda c: c.members, clusters))
    
    
        confirm = BaseCONFIRM(_docs)
        confirm.clusters = clusters
    
        print "Original Number of Clusters:", len(clustering)
        print "Final Number of Clusters:", len(clusters)
    
        '''print reps
    
        imgs = []
        
        for idx in reps:
            if idx == 0:
                imgs.append(clustering[i].center)
            else:
                idx = idx -1
                imgs.append(clustering[i].members[idx])
                
        
        display(imgs)'''
    #print  len(selectWithHac(clustering))

    #print streamSelector(clustering)

    #print entropy(clustering)
    
    #print "Analyzing"
        analyzer = metric.KnownClusterAnalyzer(confirm)
        analyzer.print_all()

        print "User Queries:", QueryCount

コード例 #34

0

ファイルを表示

ファイル: lifo_cache.py プロジェクト: Aubreymcfato/phetools

 def get(self, filename):
     with self._disk_lock:
         if type(filename) == types.UnicodeType:
             filename = filename.encode('utf-8')        
         data = super(LifoCache, self).get(filename)
         if not data:
             self.disk_read_count += 1
             if filename in self.disk_cache:
                 if os.path.exists(self.disk_cache_dir + filename):
                     self.disk_read_hit += 1
                     data = utils.load_obj(self.disk_cache_dir + filename)
                 del self.disk_cache[filename]
                 if data:
                     self.disk_cache[filename] = True
                     super(LifoCache, self).set(filename, data)
     return data

コード例 #35

0

ファイルを表示

ファイル: clusterFrame.py プロジェクト: waldol1/formCluster

def main(args):

    if(len(args) != 3):
        print "Usage: clusterFrame.py C clustering.pkl"
        print "     C is the cluster in clustering.pkl to display"
        sys.exit(0)

    C = int(args[1])
    path = args[2]

    clustering = utils.load_obj(path)

    root = Tk()
    frame = ClusterFrame(root, clustering[C])
    frame.grid()
    root.mainloop()

コード例 #36

0

ファイルを表示

ファイル: train.py プロジェクト: soxueren/ShuffleNet-tensorflow

 def __load_imagenet_weights(self):
     variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
     try:
         print("Loading ImageNet pretrained weights...")
         dict = load_obj(self.args.pretrained_path)
         run_list = []
         for variable in variables:
             for key, value in dict.items():
                 # Adding ':' means that we are interested in the variable itself and not the variable parameters
                 # that are used in adaptive optimizers
                 if key + ":" in variable.name:
                     run_list.append(tf.assign(variable, value))
         self.sess.run(run_list)
         print("Weights loaded\n\n")
     except KeyboardInterrupt:
         print("No pretrained ImageNet weights exist. Skipping...\n\n")

コード例 #37

0

ファイルを表示

ファイル: graphFrame.py プロジェクト: waldol1/formCluster

def main(args):

    if(len(args) != 3):
        print "Usage: clusterFrame.py C clustering.pkl"
        print "     C is the cluster in clustering.pkl to display"
        sys.exit(0)

    C = int(args[1])
    path = args[2]

    print "Loading"
    clustering = utils.load_obj(path)
    #clustering  = doc.get_docs_nested(driver.get_data_dir("very_small"))
        
    hierarchy = Hierarchy.createHierarchy(clustering)

    print "Starting GUI"
    root = Tk()
    frame = GraphFrame(root, hierarchy)
    frame.pack(fill=BOTH,expand=1)
    root.mainloop()

コード例 #38

0

ファイルを表示

ファイル: job_queue.py プロジェクト: phil-el/phetools

 def _load(self, filename):
     # no need of a lock here
     items = utils.load_obj(filename)
     for d in items:
         self.put(*d)

コード例 #39

0

ファイルを表示

ファイル: match_and_split.py プロジェクト: WeftWiki/phetools

def default_jobs():
    return {
        "match_queue": job_queue.JobQueue(),
        "split_queue": job_queue.JobQueue(),
        "number_of_match_job": 0,
        "number_of_split_job": 0,
    }


if __name__ == "__main__":
    try:
        cache_dir = "match_and_split_text_layer"
        if not os.path.exists(os.path.expanduser("~/cache/" + cache_dir)):
            os.mkdir(os.path.expanduser("~/cache/" + cache_dir))
        # qdel send a SIGUSR2 if -notify is used when starting the job.
        # signal.signal(signal.SIGUSR2, on_exit)
        try:
            jobs = utils.load_obj("wsdaemon.jobs")
        except:
            jobs = default_jobs()

        thread.start_new_thread(job_thread, (jobs["match_queue"], do_match))
        thread.start_new_thread(job_thread, (jobs["split_queue"], do_split))
        bot_listening()
    except KeyboardInterrupt:
        pywikibot.stopme()
        os._exit(1)
    finally:
        pywikibot.stopme()

コード例 #40

0

ファイルを表示

ファイル: compressed_storage.py プロジェクト: Aubreymcfato/phetools

 def __init__(self, filename):
     self.base_path = "/".join(filename.split('/')[:-1])
     self.index = utils.load_obj(filename + '.index')
     self.fd_data = open(filename)

コード例 #41

0

ファイルを表示

ファイル: thriftapp.py プロジェクト: moonshadow/gunicorn_thrift

 def load_thrift_app(self):
     return utils.load_obj(self.app_uri)

コード例 #42

0

ファイルを表示

ファイル: thriftapp.py プロジェクト: moonshadow/gunicorn_thrift

 def load(self):
     self.chdir()
     self.tfactory = utils.load_obj(self.cfg.thrift_transport_factory)()
     self.pfactory = utils.load_obj(self.cfg.thrift_protocol_factory)()
     self.thrift_app = self.load_thrift_app()
     return lambda: 1