Python name2dic Examples

Programming Language: Python

Namespace/Package Name: utils

Method/Function: name2dic

Examples at hotexamples.com: 3

Python name2dic - 3 examples found. These are the top rated real world Python examples of utils.name2dic extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: topic_features_LDA.py Project: michaelmior/sato

def get_table_topic(df, lda, common_dict, model_name):
    # get topic vector for table
    kwargs = name2dic(model_name)

    table_seq = []
    for col in df.columns:
        processed_col = train_LDA.process_col(df[col], **kwargs)
        table_seq.extend(processed_col)

    vector = lda[common_dict.doc2bow(table_seq)]
    return [v[1] for v in vector]

Example #2

Show file

File: feature_importance.py Project: yikeqicn/sato

    batch_size = args.batch_size
    corpus_list = args.corpus_list

    MAX_COL_COUNT = args.MAX_COL_COUNT if args.model_type == 'CRF' else None

    seed_list = [1001, 1002, 1003, 1004, 1005]
    ####################
    # Preparations
    ####################
    valid_types = get_valid_types(TYPENAME)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print("PyTorch device={}".format(device))

    if topic_name:
        topic_dim = int(name2dic(topic_name)['tn'])
    else:
        topic_dim = None

    if args.topic is not None:
        feature_group_list = args.sherlock_feature_groups + ['topic']
    else:
        feature_group_list = args.sherlock_feature_groups

    # 1. Dataset
    t1 = time()
    print("Creating Dataset object...")
    label_enc = LabelEncoder()
    label_enc.fit(valid_types)

    # load data through table instance

Example #3

Show file

    def __init__(self,
                 corpus,
                 sherlock_features: List[str] = None,
                 topic_feature: str = None,
                 label_enc: LabelEncoder = None,
                 id_filter: List[str] = None,
                 max_col_count:int = None): # if not None, pad the returning tensors to max_col_count columns. 
        
        self.sherlock_features = sherlock_features # list of sherlock features
        self.topic_feature = topic_feature # name of topic_feature
        self.label_enc = label_enc
        self.max_col_count = max_col_count
        
        self.df_header = load_tmp_df(header_path, tmp_path, '{}_{}_header_valid'.format(corpus,TYPENAME), table=True)

        # filter training/testing sets
        # filtering won't affect the pickled file used or the dictionary loaded
        if id_filter is not None:
            self.df_header = self.df_header.loc[id_filter]

        self.data_dic = {}
        
        start = time.time()

        sherlock_loaded = False

        if len(sherlock_features) > 0:

            for f_g in sherlock_features:

                dic_pkl_file = join(tmp_path, '{}_{}_{}.pkl'.format(corpus, TYPENAME, f_g))
                if os.path.exists(dic_pkl_file):
                    with open(dic_pkl_file, "rb") as fin:
                        self.data_dic[f_g] = pickle.load(fin)
                else:
                    if not sherlock_loaded:
                        self.df_sherlock = load_tmp_df(feature_path, tmp_path, '{}_{}_sherlock_features'.format(corpus, TYPENAME), table=False)
                        sherlock_loaded = True

                    print("Prepare seperate files for feature groups...")
                    feat_dict = self.df_sherlock.groupby('table_id').apply(lambda x: x[feature_group_cols[f_g]].fillna(EMBEDDING_FILL).values.astype('float')).to_dict()
                    for i in feat_dict:
                        feat_dict[i] = torch.FloatTensor(feat_dict[i])
                    self.data_dic[f_g] = feat_dict

                    with open(dic_pkl_file, "wb") as fout:
                        pickle.dump(feat_dict, fout, protocol=2)


        if topic_feature is not None:
            self.topic_no = int(name2dic(self.topic_feature)['tn'])

            dic_pkl_file = join(tmp_path, '{}_{}_{}.pkl'.format(corpus, TYPENAME, topic_feature))
            if os.path.exists(dic_pkl_file):
                with open(dic_pkl_file, "rb") as fin:
                    self.data_dic['topic'] = pickle.load(fin)

            else:

                fill = 1.0/self.topic_no
                pad_vec = lambda x: np.pad(x, (0, self.topic_no - len(x)),
                                    'constant',
                                    constant_values=(0.0, fill))

                self.df_topic = load_tmp_df(feature_path, tmp_path, '{}_{}_topic-{}_features'.format(corpus, TYPENAME, topic_feature), table=True)
                topic_dict = self.df_topic.apply(lambda x: pad_vec(eval(x['table_topic'])), axis=1).to_dict()

                for i in topic_dict:
                    topic_dict[i] = torch.FloatTensor(topic_dict[i])

                self.data_dic['topic'] = topic_dict
                with open(dic_pkl_file, "wb") as fout:
                    pickle.dump(topic_dict, fout, protocol=2)


        
        print("Total data preparation time:", time.time()-start)