Exemple #1
0
def read_all_reports(report_file, trace_folder, process_num):
    file_list = os.listdir(trace_folder)
    file_num = len(file_list)
    trace_collection = []
    pool = Pool(process_num)
    for file_no in range(file_num):
        pool.apply_async(
            read_single_trace,
            args = (trace_folder, file_list[file_no], file_no),
            callback = trace_collection.append
        )
    pool.close()
    pool.join()
    print('Finish reading all the traces')
    trace_dict = {}
    for item in trace_collection:
        trace_dict[item[0]] = item[1]
    # read reports
    reports = utils.read_pkl(report_file)
    # split reports
    report_dict = {
        'm': [], 'b': []
    }
    for item in reports:
        report_dict[item[1]].append(item[0])
    print('Finish splitting the reports into two categories!')
    return trace_dict, report_dict
Exemple #2
0
def read_single_trace(folder_path, file_name, file_no):
    if file_no % 100 == 0:
        print('Reading %d_th trace' % file_no)
    file_path = os.path.join(folder_path, file_name)
    if NPZTag:
        tmp = np.load(file_path)
        content = tmp['trace']
        trace_hash = file_name.split('.')[0]
    else:
        content = utils.read_pkl(file_path)
        trace_hash = file_name
    unique_insns = np.unique(np.asarray(content))
    temp = [trace_hash, unique_insns]
    return temp
Exemple #3
0
def main(period, time_limit, max_vids, min_score, no_download):
    compiler = Compiler(priv_utils.get_yt(), priv_utils.get_reddit(), MAIN_DIR)

    # for debugging
    fetch = False
    write = True
    vid_pkl_path = MAIN_DIR + '/src/vid_info.pkl'

    # vid info will either be fetched or read from file
    if fetch:
        print("fetching")  # TODO: remove
        vid_info, total_duration = compiler.fetch_vid_info(
            period=period,
            time_limit=time_limit,
            max_vids=max_vids,
            min_score=min_score)

        # only write if new values have been fetched
        if write:
            print("writing")  # TODO: remove
            utils.write_pkl(vid_pkl_path, (vid_info, total_duration))
    else:
        print("reading")  # TODO: remove
        #vid_info, total_duration = utils.read_vid_info(vid_pkl_path)
        vid_info, total_duration = utils.read_pkl(vid_pkl_path)

    print("total duration: " + str(total_duration) + "\n")
    for i in range(len(vid_info)):
        vid_info[i].print_info()

    # generate name of compilation for file naming
    comp_name = compiler.comp_name_gen(period, max_vids)
    # create a directory for files related to compilation
    os.system("mkdir " + MAIN_DIR + "/final/" + comp_name)

    # shuffle vid order so that worst vids aren't always last
    random.shuffle(vid_info)

    description = compiler.gen_description(vid_info)
    compiler.write_description(comp_name, description)
    compiler.write_tags(comp_name)
    compiler.write_title(comp_name, period, max_vids)

    if not no_download:
        compiler.download_vids(vid_info)

    compiler.create_compilation(comp_name, vid_info)
Exemple #4
0
def process_poc_trace(poc_trace_path, bin_path, target_src_str):
    if NPZTag:
        tmp = np.load(poc_trace_path)
        poc_trace = tmp['trace']
    else:
        poc_trace = utils.read_pkl(poc_trace_path)
    poc_trace = np.asarray(poc_trace)
    if len(target_src_str) == 0:
        return poc_trace
    else:
        insn_list = parse_dwarf.get_bin_line(bin_path, target_src_str)
        insn_idx_list = []
        for insn in insn_list:
            insn_idx_list += list(np.where(poc_trace == insn)[0])
        if len(insn_idx_list) == 0:
            raise Exception("ERROR: Cannot find the instructions for source -> %s" % target_src_str)
        max_id = max(insn_idx_list)
        return poc_trace[:max_id+1]
Exemple #5
0
    parser.add_argument('--qid_list', required=False, default=None)

    args = parser.parse_args()

    os.makedirs(args.output_path, exist_ok=True)
    logging.basicConfig(format='%(levelname)s :: %(asctime)s - %(message)s',
                        level=args.log_level,
                        datefmt='%d/%m/%Y %I:%M:%S %p',
                        filename=os.path.join(args.output_path,
                                              "GET_TURK_DATA_LOG"),
                        filemode='w')

    data_root = os.path.join(args.data_repo_root, args.dataset)

    distmult_dump = utils.read_pkl(args.model_weights)
    logging.info("Read Model Dump")

    data = utils.read_data(args.test_file)
    qids = set(range(len(data)))
    if args.qid_list is not None:
        qids = set(np.loadtxt(args.qid_list, dtype=int).ravel().tolist())
        assert max(qids) < len(data)
    #

    mapped_data = np.array(
        utils.map_data(data, distmult_dump['entity_to_id'],
                       distmult_dump['relation_to_id'])).astype(np.int32)
    logging.info("Loaded test file from %s" % (args.test_file))

    if (args.template_pred is None and args.rule_pred is None):
Exemple #6
0
op.add_option('--nw', dest='nb_work', default=4, type='int', help='加载数据的线程数')
op.add_option('--pr',
              '--path_result',
              dest='path_result',
              default='./result.txt',
              type='str',
              help='预测结果存放路径')
argv = [] if is_interactive() else sys.argv[1:]
(opts, args) = op.parse_args(argv)

# 初始化数据参数
root_idx = opts.root_idx
path_num = os.path.join(root_idx, 'instance.csv')
max_len = opts.max_len
root_voc = opts.root_voc
word2id_dict = read_pkl(os.path.join(root_voc, 'word2id.pkl'))
label2id_dict = read_pkl(os.path.join(root_voc, 'label2id.pkl'))
has_label = False
batch_size = opts.batch_size
use_cuda = opts.cuda
num_worker = opts.nb_work
path_result = opts.path_result

t0 = time()

# 初始化数据
dataset = SentenceDataUtil(path_num,
                           root_idx,
                           max_len,
                           word2id_dict,
                           has_label,
Exemple #7
0
              action='store_true',
              default=False,
              help='是否使用GPU加速')
op.add_option('--nw', dest='nb_work', default=4, type='int', help='加载数据的线程数')
argv = [] if is_interactive() else sys.argv[1:]
(opts, args) = op.parse_args(argv)
if not opts.nb_class:
    op.print_help()
    exit()

# 初始化数据参数
root_idx = opts.root_idx
path_num = os.path.join(root_idx, 'instance.csv')
max_len = opts.max_len
root_voc = opts.root_voc
word2id_dict = read_pkl(os.path.join(root_voc, 'word2id.pkl'))
label2id_dict = read_pkl(os.path.join(root_voc, 'label2id.pkl'))
has_label = True
dev_size = opts.dev_size
batch_size = opts.batch_size
num_worker = opts.nb_work

# 初始化数据
dataset = SentenceDataUtil(path_num,
                           root_idx,
                           max_len,
                           word2id_dict,
                           has_label,
                           label2id_dict,
                           shuffle=True)
dataset_train, dataset_dev = dataset.split_train_and_dev(dev_size=dev_size)
import argparse
import logging
import os
import pickle
import string
import time
import numpy as np
import pandas as pd

data_repo_root = "../data/fb15k/"
model_weights = "dumps/fb15k237_distmult_dump_norm.pkl"
wiki_file = os.path.join(data_repo_root, "mid2wikipedia.tsv")
orig_file = os.path.join(data_repo_root, "entity_mid_name_type_typeid.txt")
intersection_file = os.path.join(data_repo_root, "mid2wikipedia_cleaned.tsv")

distmult_dump = utils.read_pkl(model_weights)


def read_data(path):
    mapping_name = {}
    mapping_url = {}
    with open(path, "r") as f:
        for line in f:
            line_arr = line.split("\t")
            mapping_name[line_arr[0]] = line_arr[1]
            mapping_url[line_arr[0]] = line_arr[2]
    return mapping_name, mapping_url


mapping_name, mapping_url = read_data(wiki_file)
    parser.add_argument('--qid_list', required=False, default=None)

    args = parser.parse_args()

    os.makedirs(args.output_path, exist_ok=True)
    logging.basicConfig(format='%(levelname)s :: %(asctime)s - %(message)s',
                        level=args.log_level,
                        datefmt='%d/%m/%Y %I:%M:%S %p',
                        filename=os.path.join(args.output_path,
                                              "GET_TURK_DATA_LOG"),
                        filemode='w')

    data_root = os.path.join(args.data_repo_root, args.dataset)

    distmult_dump = utils.read_pkl(args.model_weights)
    logging.info("Read Model Dump")

    data = utils.read_data(args.test_file)
    qids = set(range(len(data)))
    if args.qid_list is not None:
        qids = set(np.loadtxt(args.qid_list, dtype=int).ravel().tolist())
        assert max(qids) < len(data)
    #

    mapped_data = np.array(
        utils.map_data(data, distmult_dump['entity_to_id'],
                       distmult_dump['relation_to_id'])).astype(np.int32)
    logging.info("Loaded test file from %s" % (args.test_file))

    # if(args.template_pred is None and args.rule_pred is None):
Exemple #10
0
import numpy as np
import cv2
from config import *

make_dir(reconstruction_path)
make_dir(generate_samples_path)
make_dir(mean_var_path)


def gen_data(mean, cov, num):
    mean = mean.cpu()
    data = np.random.multivariate_normal(mean, cov, num)
    return np.round(data, 4)


map_dict = read_pkl()
mean = map_dict[0].float()

num = 40
net1 = torch.load(
    os.path.join(model_path, 'encoder_sigma_' + str(num) + '.pth'))
net2 = torch.load(
    os.path.join(model_path, 'decoder_sigma_' + str(num) + '.pth'))

for i in net1.parameters():
    i.requires_grad = False

for i in net2.parameters():
    i.requires_grad = False

net1 = net1.cuda()
    def process_data(self, meta_file, feat_file, phn_file, wrd_file, slb_file):
        if self.num_all_utts == -1:
            meta_data = read_pkl(meta_file)[
                'prefix']  # {'prefix': [num_of_utts x drID_spkID_uttID]}
            feat = read_pkl(
                feat_file)  # [num_of_utts x num_of_frames x feat_dim]
            phn_data = read_pkl(
                phn_file
            )  # [num_of_utts x num_of_phns x [phn, start, end]] ** include 'h#' **
            wrd_data = read_pkl(
                wrd_file)  # [num_of_utts x num_of_wrds x [wrd, start, end]]
            # slb_data = read_pkl(slb_file)       # [num_of_utts x num_of_slbs x [slb, start, end]]
        else:
            meta_data = read_pkl(
                meta_file
            )['prefix'][:self.
                        num_all_utts]  # {'prefix': [num_of_utts x drID_spkID_uttID]}
            feat = read_pkl(
                feat_file
            )[:self.num_all_utts]  # [num_of_utts x num_of_frames x feat_dim]
            phn_data = read_pkl(
                phn_file
            )[:self.
              num_all_utts]  # [num_of_utts x num_of_phns x [phn, start, end]] ** include 'h#' **
            wrd_data = read_pkl(
                wrd_file
            )[:self.
              num_all_utts]  # [num_of_utts x num_of_wrds x [wrd, start, end]]
            # slb_data = read_pkl(slb_file)[:self.num_all_utts]       # [num_of_utts x num_of_slbs x [slb, start, end]]

        phn_wrd_data = self.make_phn_wrd(phn_data, wrd_data)

        self.n_utts = len(feat)
        print("Read %s utterances" % self.n_utts)

        for i, phn_utt in enumerate(phn_data):
            # Process each phn in utt
            phn_meta_utt = []
            for j, (phn, phn_start, phn_end) in enumerate(phn_utt):
                self.n_total_phns += 1
                self.phn2cnt[phn] += 1
                phn_meta_utt.append((phn, i, phn_start, phn_end))
            self.phn_meta.append(phn_meta_utt)

        for i, (utt, feat_utt, wrd_utt, phn_wrd_utt) \
                in enumerate(zip(meta_data, feat, wrd_data, phn_wrd_data)):
            spk = utt.split('_')[1]

            if not spk in self.spk2utt_idx:
                self.spks.append(spk)
                self.spk2utt_idx[spk] = []
                self.spk2wrd_idx[spk] = []
            self.spk2utt_idx[spk].append(i)
            self.utt_idx2spk[i] = spk

            # Process each wrd in utt
            wrd_meta_utt = []
            phn_wrd_meta_utt = []
            for j, ((wrd, wrd_start, wrd_end),
                    phn_wrd) in enumerate(zip(wrd_utt, phn_wrd_utt)):
                wrd = wrd.lower()
                if j != 0 and wrd_start >= wrd_utt[
                        j - 1][1] and wrd_end <= wrd_utt[j - 1][2]:
                    # print ('Words overlap:')
                    # print (i, j-1, j, wrd_utt[j-1], wrd_utt[j])
                    continue
                if j != len(wrd_utt) - 1 and wrd_start >= wrd_utt[
                        j + 1][1] and wrd_end <= wrd_utt[j + 1][2]:
                    # print ('Words overlap:')
                    # print (i, j, j+1, wrd_utt[j], wrd_utt[j+1])
                    continue
                if wrd_end - wrd_start == 0:
                    # print (i, wrd)
                    continue
                if not len(phn_wrd):
                    # print (i, j)
                    continue
                self.n_total_wrds += 1
                if not wrd in self.wrd2idx:
                    self.wrd2idx[wrd] = self.n_wrds
                    self.idx2wrd[self.n_wrds] = wrd
                    self.n_wrds += 1
                self.wrd2cnt[wrd] += 1
                wrd_meta_utt.append((wrd, i, wrd_start, wrd_end))

                if not (wrd, phn_wrd) in self.phn_wrd2idx:
                    self.phn_wrd2idx[(wrd, phn_wrd)] = self.n_phn_wrds
                    self.idx2phn_wrd[self.n_phn_wrds] = (wrd, phn_wrd)
                    self.n_phn_wrds += 1
                self.phn_wrd2cnt[(wrd, phn_wrd)] += 1
                phn_wrd_meta_utt.append(
                    ((wrd, phn_wrd), i, wrd_start, wrd_end))

                self.spk2wrd_idx[spk].append(self.n_total_wrds - 1)
                self.wrd_idx2spk[self.n_total_wrds - 1] = spk
                self.feat.append(feat[i][wrd_start:wrd_end])

                phn_idx_array = np.array(
                    [self.phn2idx[phn] - 1 for phn in phn_wrd])
                self.phn_idx_arrays.append(phn_idx_array)
                self.txt_feat.append(
                    self.make_one_hot_feat(phn_idx_array, self.n_phns - 1))

                char_idx_array = np.array(
                    [self.char2idx[char] for char in wrd])
                self.char_idx_arrays.append(char_idx_array)
                self.txt_feat_char.append(
                    self.make_one_hot_feat(char_idx_array, self.n_chars))

            self.wrd_meta.append(wrd_meta_utt)
            self.phn_wrd_meta.append(phn_wrd_meta_utt)

            # Process each slb in utt
            # slb_meta_utt = []
            # for j, (slb, slb_start, slb_end) in enumerate(slb_utt):
            # if slb_start >= slb_utt[j-1][1] and slb_end <= slb_utt[j-1][2]:
            # # print ('Syllables overlap:')
            # # print (i, j-1, j, slb_utt[j-1], slb_utt[j])
            # continue

            # self.n_total_slbs += 1
            # if not slb in self.slb2idx:
            # self.slb2idx[slb] = self.n_slbs
            # self.idx2slb[self.n_slbs] = slb
            # self.n_slbs += 1
            # self.slb2cnt[slb] += 1

            # # slb_meta_utt update
            # slb_meta_utt.append((slb, i, slb_start, slb_end))

            # self.slb_meta.append(slb_meta_utt)

        self.feat = np.array(self.feat)
        self.txt_feat = np.array(self.txt_feat)
        self.txt_feat_char = np.array(self.txt_feat_char)
        self.n_batches = len(self.feat) // self.batch_size
        if len(self.feat) % self.batch_size != 0:
            self.n_batches += 1

        for wrd_meta_utt in self.wrd_meta:
            self.wrds.extend([w[0] for w in wrd_meta_utt])
        for phn_wrd_meta_utt in self.phn_wrd_meta:
            self.phn_wrds.extend([w[0] for w in phn_wrd_meta_utt])

        print('Num of total words: ', len(self.feat), len(self.txt_feat),
              len(self.txt_feat_char), self.n_total_wrds)
        print('Num of distinct words (with different phonemes): ',
              self.n_phn_wrds)
        print('Num of distinct words: ', self.n_wrds)
        print('Num of batches: ', self.n_batches)

        return