コード例 #1
0
 def save_doc_covid(self):
     utils.save_dict(
         self.document_posting_covid,
         "doc_posting_covid" + str(self.doc_posting_covid_counter),
         self.config.get_out_path())
     self.doc_posting_covid_counter += 1
     self.document_posting_covid = {}
コード例 #2
0
def seg_stats(path):
    sumary_dict = {
        "wt_seg_size": [],
        "tc_seg_size": [],
        "et_seg_size": [],
        "image_size": []
    }

    file_list = []

    for folder in os.listdir(path):
        print(folder)
        file = os.path.join(path, folder, folder + "_seg.nii.gz")
        file_list.append(file)

    i = 1
    for file in file_list:
        print("Doing :", i, "/", len(file_list))
        sumary_dict = explore.process_all(file, sumary_dict)
        i += 1

    utils.save_dict(sumary_dict)

    print("END")
    return 0
コード例 #3
0
ファイル: hoi_test.py プロジェクト: aag147/rcnn
def saveEvalResults(generator, cfg, obj_mapping, hoi_mapping, evalData=None):
    
    my_output_path = cfg.results_path + 'hoi' + cfg.my_results_dir + '/res/' + generator.data_type + generator.approach + '/'
    
    path = cfg.part_results_path + cfg.dataset + "/hoi" + cfg.my_results_dir
    mode = generator.data_type
    
    if not os.path.exists(path):
        path = path[:-1]
    path += '/'
    nb_empty = 0
    if evalData is None:
        evalData = []
        for batchidx, (imageID, imageMeta) in enumerate(generator.imagesMeta.items()):
            if (batchidx+1) % (max(1,generator.nb_batches // 100)) == 0 or batchidx==1 or (batchidx+1) == generator.nb_batches:
                utils.update_progress_new(batchidx+1, generator.nb_batches, imageID)
            
            if os.path.exists(my_output_path + imageID + '.pkl'):
                data = utils.load_obj(my_output_path + imageID)
                if data is not None and len(data) > 0:
                    evalData.extend(data)
            else:
                nb_empty += 1
                
    evalData = cp.copy(evalData)
    mAP, AP, _ = metrics.computeHOImAP(evalData, generator.imagesMeta, obj_mapping, hoi_mapping, cfg)
    saveMeta = {'mAP': mAP, 'zAP': AP.tolist(), 'nb_empties': nb_empty}
    utils.save_dict(saveMeta, path+mode+'_mAP')
    print('mAP', mode, mAP)
    print('empties', nb_empty)
コード例 #4
0
def get_subreddit_entry_info_thread(sub_list):
    import os
    from utils import load_dict, save_dict

    subreddits_dlist = []
    #log('**** thread running:'+repr(sub_list))
    if os.path.exists(subredditsPickle):
        #log('****file exists ' + repr( subredditsPickle ))
        subreddits_dlist = load_dict(subredditsPickle)
        #for e in subreddits_dlist: log(e.get('entry_name'))
        #log( pprint.pformat(subreddits_dlist, indent=1) )
    #log('****------before for -------- ' + repr(sub_list ))
    for subreddit in sub_list:
        #remove old instance of subreddit
        #log('****processing ' + repr( subreddit ))
        subreddits_dlist = [
            x for x in subreddits_dlist
            if x.get('entry_name') != subreddit.lower()
        ]
        #log('getting sub info')
        sub_info = get_subreddit_info(subreddit)
        log('    retrieved subreddit info ' + repr(sub_info))
        if sub_info:
            subreddits_dlist.append(sub_info)
            save_dict(subreddits_dlist, subredditsPickle)
コード例 #5
0
def get_subreddit_entry_info_thread(sub_list):
    from utils import load_dict, save_dict, get_domain_icon, setting_entry_is_domain

    global subreddits_dlist #subreddits_dlist=[]

    if not subreddits_dlist:
        if os.path.exists(subredditsPickle):

            subreddits_dlist=load_dict(subredditsPickle)

    for subreddit in sub_list:
        subreddit=subreddit.lower().strip()

        subreddits_dlist=[x for x in subreddits_dlist if x.get('entry_name','') != subreddit ]
        domain=setting_entry_is_domain(subreddit)
        if domain:
            log('  getting domain info '+domain)
            sub_info=get_domain_icon(subreddit,domain)

        else:
            log('  getting sub info '+subreddit)
            sub_info=get_subreddit_info(subreddit)

        log('    retrieved subreddit info ' + repr( sub_info ))
        if sub_info:
            subreddits_dlist.append(sub_info)
            save_dict(subreddits_dlist, subredditsPickle)
コード例 #6
0
def prepare_data(video_dir, save_dir='datas', frame_gap=10, frame_size=224, frames_per_group=16):
    # "video-group" dictionary
    frame_info = dict()
    # extract frame from directory
    for lidx, label in enumerate(os.listdir(video_dir)):
        # check data director, if not exist, create
        save_class_dir = os.path.join(save_dir, label)
        if not os.path.exists(save_class_dir):
            os.makedirs(save_class_dir)
            print('create dirctory: {}'.format(save_class_dir))
        # read from each video
        for vidx, video in enumerate(os.listdir(os.path.join(video_dir, label))):
            # read video
            cap = cv2.VideoCapture(os.path.join(video_dir, label, video))
            frame_count = 0
            while(cap.isOpened()):
                _, frame = cap.read()
                # image save each "frame_gap" time
                if frame is not None:
                    frame_count += 1
                    if(frame_count % frame_gap == 1):
                        group = frame_count // (frame_gap * frames_per_group)
                        index = (frame_count // frame_gap ) % frames_per_group
                        im = cv2.resize(frame, (frame_size,frame_size))
                        im = im[:,56:168,:]
                        im = cv2.resize(im, (frame_size,frame_size))
                        cv2.imwrite(os.path.join(save_class_dir, 'video{}_group{}_index{}.jpg'.format(vidx, group, index)), im)
                if (cv2.waitKey(1) & 0xFF == ord('q')) or frame is None:
                    break
            # When everything done, release the capture
            cap.release()
            print('process video: {}, frames: {}, frame_gap: {}, groups: {}'.format(os.path.join(video_dir, label, video), frame_count, frame_gap, group))
            frame_info[os.path.join(save_class_dir, 'video{}@{}'.format(vidx, lidx))] = frame_count // (frame_gap * frames_per_group)
    # dave dictionary
    save_dict('model/frame_info_{}.pkl'.format(frames_per_group), frame_info)
コード例 #7
0
def performance(parameter_name, values, config=CONFIG):
    save_folder = generate_folder_name()
    print("\n\nPerformance test for parameter '{}' with values {}".format(
        parameter_name, values))

    results = np.empty((len(values), 2), dtype=float)
    drop = np.empty((len(values), 2), dtype=float)

    for index, v in enumerate(values):
        print("Run {} with value {} on GPU".format(index + 1, v))
        results[index, 0], drop[index, 0] = run((parameter_name, v),
                                                ('gpu', True))
        utils.clear_current_line()

        # print("Run {} with value {} on CPU".format(index+1, v))
        # results[index, 1], drop[index, 1] = run((parameter_name, v), ('gpu', False))
        # utils.clear_current_line()

    # print("Results: \n{}".format(results))
    print("Number of dropped solutions: \n " + "GPU: " + format(drop[:, 0]))
    gpu_results = tuple(results[:, 0])
    # cpu_results = tuple(results[:, 1])
    cpu_results = 0
    exp_name = tuple(values)
    utils.save_dict(config, save_folder, 'config_' + parameter_name + '.txt')

    acoc_plotter.plot_bar_graph(gpu_results,
                                cpu_results,
                                exp_name,
                                save=True,
                                show=True,
                                save_folder=SAVE_DIR)
コード例 #8
0
def performance(parameter_name, values, config=CONFIG):
    save_folder = generate_folder_name()
    print("\n\nPerformance test for parameter '{}' with values {}".format(parameter_name, values))

    results = np.empty((len(values), 2), dtype=float)
    drop = np.empty((len(values), 2), dtype=float)

    for index, v in enumerate(values):
        print("Run {} with value {} on GPU".format(index+1, v))
        results[index, 0], drop[index, 0] = run((parameter_name, v), ('gpu', True))
        utils.clear_current_line()

        # print("Run {} with value {} on CPU".format(index+1, v))
        # results[index, 1], drop[index, 1] = run((parameter_name, v), ('gpu', False))
        # utils.clear_current_line()

    # print("Results: \n{}".format(results))
    print("Number of dropped solutions: \n " + "GPU: " + format(drop[:, 0]))
    gpu_results = tuple(results[:, 0])
    # cpu_results = tuple(results[:, 1])
    cpu_results = 0
    exp_name = tuple(values)
    utils.save_dict(config, save_folder, 'config_' + parameter_name + '.txt')

    acoc_plotter.plot_bar_graph(gpu_results, cpu_results, exp_name, save=True, show=True, save_folder=SAVE_DIR)
コード例 #9
0
def main():
    args = params()
    tag2id_path = os.path.join(args["output_path"], args["tag2id"])

    if not os.path.exists(args["output_path"]):
        os.makedirs(args["output_path"])
    if not os.path.join(args["pb_path"]):
        os.makedirs(args["pb_path"])
    tag2id = {"体育": 0, "健康": 1, "军事": 2, "教育": 3, "汽车": 4}
    max_len = args["max_len"]
    batch_size = args["batch_size"]
    epoch = args["epoch"]
    # load data
    data, label = load_data(args["data_file"], tag2id)
    logger.info("total data size: {}".format(len(data)))
    logger.info("total label size: {}".format(len(label)))
    # random 乱序
    data, label = random_shuffle(data, label)
    # save tag2id
    save_dict(tag2id, tag2id_path)
    # label encoder
    total_label = label_encoder(label, len(tag2id))

    # get train test data
    train_data, dev_data, train_label, dev_label = train_test_split(
        data, total_label, test_size=0.2)
    logger.info("train data size: {}".format(len(train_data)))
    logger.info("dev data size: {}".format(len(dev_data)))
    # bert tokenizer
    tokenizer = get_tokenizer()
    # tokenizer = get_roberta_tokenizer()
    # 准备模型数据
    train_x, train_y = create_inputs_targets(train_data, train_label, max_len,
                                             tokenizer)
    dev_x, dev_y = create_inputs_targets(dev_data, dev_label, max_len,
                                         tokenizer)

    # create model bert
    # model = create_model(len(tag2id))
    model = create_model(args["bert_model_name"], len(tag2id))
    # model.summary()
    model.fit(train_x,
              train_y,
              epochs=epoch,
              verbose=1,
              batch_size=batch_size,
              validation_data=(dev_x, dev_y),
              validation_batch_size=batch_size)  # , validation_split=0.1

    # model save
    model_path = os.path.join(args["output_path"], "classification_model.h5")
    model.save_weights(model_path, overwrite=True)

    # save pb model
    tf.keras.models.save_model(model,
                               args["pb_path"],
                               save_format="tf",
                               overwrite=True)
コード例 #10
0
def collect_dispersion_from_earthsr_and_save(nside, options):

    data_dispersion_file_fund = utils.load(options['global_folder'] +
                                           'disp_vconly.input_code_earthsr')

    data_dispersion = [{} for i in range(0, options['nb_modes'][1])]
    list_modes_side = [{} for j in range(0, options['nb_modes'][1])]
    for nmode in range(0, options['nb_modes'][1]):

        list_modes_side[nmode]['loc'] = np.where(
            data_dispersion_file_fund[:, 0] == nmode)

        freq_domain = 0
        if (list_modes_side[nmode]['loc'][0].size > 0):
            data_dispersion[nmode]['period'] = data_dispersion_file_fund[
                list_modes_side[nmode]['loc'][0], 1]
            data_dispersion[nmode]['cphi'] = data_dispersion_file_fund[
                list_modes_side[nmode]['loc'][0], 2]
            data_dispersion[nmode]['cg'] = data_dispersion_file_fund[
                list_modes_side[nmode]['loc'][0], 3]
            data_dispersion[nmode]['QR'] = data_dispersion_file_fund[
                list_modes_side[nmode]['loc'][0], 4]

        ## Add nan for periods where 1st mode has not been calculated
        if (nmode > 0 and list_modes_side[nmode]['loc'][0].size > 0):

            cpt = len(data_dispersion[nmode]['period']) - 1
            save_cphi = data_dispersion[nmode]['cphi'][-1] * 0. + np.inf
            save_cg = data_dispersion[nmode]['cg'][-1] * 0. + np.inf
            save_QR = data_dispersion[nmode]['QR'][-1] * 0. + np.inf
            while data_dispersion[nmode]['period'][-1] < data_dispersion[0][
                    'period'][-1]:
                cpt += 1
                data_dispersion[nmode]['period'] = np.concatenate([
                    data_dispersion[nmode]['period'],
                    [data_dispersion[0]['period'][cpt]]
                ])
                data_dispersion[nmode]['cphi'] = np.concatenate(
                    [data_dispersion[nmode]['cphi'], [save_cphi]])
                data_dispersion[nmode]['cg'] = np.concatenate(
                    [data_dispersion[nmode]['cg'], [save_cg]])
                data_dispersion[nmode]['QR'] = np.concatenate(
                    [data_dispersion[nmode]['QR'], [save_QR]])

    ## Save with name "current_struct" to be consistent with resonance_eigen
    current_struct = data_dispersion
    for nmode in range(0, len(current_struct)):
        if (len(current_struct[nmode]) > 0):
            current_struct[nmode]['fks'] = 1. / current_struct[nmode]['period']

    utils.save_dict(current_struct,
                    options['global_folder'] + 'PARAM_dispersion.mat')

    return current_struct
コード例 #11
0
def get_subreddit_entry_info_thread(sub_list):
    from utils import load_dict, save_dict, get_domain_icon, setting_entry_is_domain
    from domains import ClassYoutube

    global subreddits_dlist  #subreddits_dlist=[]
    #log('**** thread running:'+repr(sub_list))
    if not subreddits_dlist:
        if os.path.exists(subredditsPickle):
            #log('****file exists ' + repr( subredditsPickle ))
            subreddits_dlist = load_dict(subredditsPickle)
            #for e in subreddits_dlist: log(e.get('entry_name'))
            #log( pprint.pformat(subreddits_dlist, indent=1) )
    #log('****------before for -------- ' + repr(sub_list ))
    for subreddit in sub_list:
        #handle link shortcuts
        if subreddit.startswith('https://'):
            entry_in_file = subreddit
            without_alias = re.sub(r"[\(\[].*?[\)\]]", "", entry_in_file)
            yt = ClassYoutube(without_alias)
            url_type, id_ = yt.get_video_channel_user_or_playlist_id_from_url(
                without_alias)
            if url_type == 'channel':
                sub_info = yt.get_channel_info(id_, entry_name=entry_in_file)
            else:
                #this part not used, right now only youtube channels are supported.
                log('  getting link info:entry_in_file=%s  without_alias=%s' %
                    (repr(entry_in_file), repr(without_alias)))
                sub_info = get_domain_icon(entry_in_file, None, without_alias)
        else:
            subreddit = subreddit.lower().strip()
            #remove old instance of subreddit
            #log('****processing ' + repr( subreddit ))
            subreddits_dlist = [
                x for x in subreddits_dlist
                if x.get('entry_name', '') != subreddit
            ]

            domain = setting_entry_is_domain(subreddit)
            if domain:
                log('  getting domain info ' + domain)
                sub_info = get_domain_icon(subreddit, domain)
                #icon="http://%s/favicon.ico"%domain
            else:
                log('  getting sub info ' + subreddit)
                sub_info = get_subreddit_info(subreddit)

        log('    retrieved subreddit info ' + repr(sub_info))
        if sub_info:
            subreddits_dlist.append(sub_info)
            save_dict(subreddits_dlist, subredditsPickle)
コード例 #12
0
ファイル: dataset.py プロジェクト: softmicro929/py-cae
def prepare_data(im_size, data_dir, extra_data_dir=False):
    # create directory if not exist
    if not os.path.exists('data/images'):
        os.makedirs('data/images')

    # prepare self data
    prepare_data_self(im_size, data_dir)
    # if use extra data, add kdef data into list
    if extra_data_dir:
        prepare_data_kdef(im_size, extra_data_dir, 70)

    # save name-label dictionary
    print 'Use extra data: {}, #persons: {}, #emotions: {}'.format(
        extra_data_dir, len(name_dict.keys()), len(emos_dict.keys()))
    save_dict('model/name_dict.txt', name_dict)
    save_dict('model/emotion_dict.txt', emos_dict)
コード例 #13
0
def get_dataset_stats(limit):
    DATA_STATS_FILE = P.STATS_FOLDER + '/cifar10_' + str(limit) + '.pt'
    MEAN_KEY = 'mean'
    STD_KEY = 'std'
    ZCA_KEY = 'zca'

    # Load statistics
    stats = utils.load_dict(DATA_STATS_FILE)  # Try to load stats from file
    if stats is None:  # Stats file does not exist --> Compute statistics
        print("Computing statistics on dataset[0:" + str(limit) +
              "] (this might take a while)")

        # Load dataset
        cifar10 = CIFAR10(root=P.DATA_FOLDER, train=True,
                          download=True)  # Load CIFAR10 dataset
        X = cifar10.data[
            0:
            limit]  # X is M x N (M = limit: samples, N = 3072: variables per dataset sample)

        # Normalize the data to [0 1] range
        X = X / 255.
        # Compute mean and st. dev. and normalize the data to zero mean and unit variance
        mean = X.mean(axis=(0, 1, 2), keepdims=True)
        std = X.std(axis=(0, 1, 2), keepdims=True)
        X = (X - mean) / std
        # Transpose image tensors dimensions in order to put channel dimension in pos. 1, as expected by pytorch
        X = X.transpose(0, 3, 1, 2)
        # Reshape image tensors from shape 32x32x3 to vectors of size 32*32*3=3072
        X = X.reshape(limit, -1)
        # Compute ZCA matrix
        cov = np.cov(X, rowvar=False)
        U, S, V = np.linalg.svd(cov)
        SMOOTHING_CONST = 1e-1
        zca = np.dot(U, np.dot(np.diag(1.0 / np.sqrt(S + SMOOTHING_CONST)),
                               U.T))

        # Save statistics
        stats = {
            MEAN_KEY: mean.squeeze().tolist(),
            STD_KEY: std.squeeze().tolist(),
            ZCA_KEY: torch.from_numpy(zca).float()
        }
        utils.save_dict(stats, DATA_STATS_FILE)
        print("Statistics computed and saved")

    return stats[MEAN_KEY], stats[STD_KEY], stats[ZCA_KEY]
コード例 #14
0
def get_subreddit_entry_info_thread(sub_list):
    from utils import load_dict, save_dict, get_domain_icon, setting_entry_is_domain
    from domains import ClassYoutube

    global subreddits_dlist #subreddits_dlist=[]
    #log('**** thread running:'+repr(sub_list))
    if not subreddits_dlist:
        if os.path.exists(subredditsPickle):
            #log('****file exists ' + repr( subredditsPickle ))
            subreddits_dlist=load_dict(subredditsPickle)
            #for e in subreddits_dlist: log(e.get('entry_name'))
            #log( pprint.pformat(subreddits_dlist, indent=1) )
    #log('****------before for -------- ' + repr(sub_list ))
    for subreddit in sub_list:
        #handle link shortcuts
        if subreddit.startswith('https://'):
            entry_in_file=subreddit
            without_alias=re.sub(r"[\(\[].*?[\)\]]", "", entry_in_file)
            yt=ClassYoutube(without_alias)
            url_type,id_=yt.get_video_channel_user_or_playlist_id_from_url(without_alias)
            if url_type=='channel':
                sub_info=yt.get_channel_info(id_, entry_name=entry_in_file)
            else:
                #this part not used, right now only youtube channels are supported.
                log('  getting link info:entry_in_file=%s  without_alias=%s'%(repr(entry_in_file),repr(without_alias))  )
                sub_info=get_domain_icon(entry_in_file,None,without_alias )
        else:
            subreddit=subreddit.lower().strip()
            #remove old instance of subreddit
            #log('****processing ' + repr( subreddit ))
            subreddits_dlist=[x for x in subreddits_dlist if x.get('entry_name','') != subreddit ]

            domain=setting_entry_is_domain(subreddit)
            if domain:
                log('  getting domain info '+domain)
                sub_info=get_domain_icon(subreddit,domain)
                #icon="http://%s/favicon.ico"%domain
            else:
                log('  getting sub info '+subreddit)
                sub_info=get_subreddit_info(subreddit)

        log('    retrieved subreddit info ' + repr( sub_info ))
        if sub_info:
            subreddits_dlist.append(sub_info)
            save_dict(subreddits_dlist, subredditsPickle)
コード例 #15
0
ファイル: benchmark_gpu.py プロジェクト: torryt/polyaco-plus
def benchmark(parameter_name, values, config=CONFIG):
    save_folder = generate_folder_name()
    print("\n\nBenchmark for parameter '{}' with values {}".format(parameter_name, values))

    results = np.empty((len(values), 2), dtype=float)
    for index, v in enumerate(values):
        print("Run {} with value {} on GPU".format(index+1, v))
        results[index, 0] = run((parameter_name, v), ('gpu', True))
        utils.clear_current_line()

        print("Run {} with value {} on CPU".format(index+1, v))
        results[index, 1] = run((parameter_name, v), ('gpu', False))
        utils.clear_current_line()

    result_str = "Results: \n{}".format(results)
    print(result_str)
    utils.save_dict(config, save_folder, 'config_' + parameter_name + '.txt')
    utils.save_string_to_file(result_str, save_folder, 'results.txt')
コード例 #16
0
def saveEvalResults(evalData, generator, cfg, obj_mapping):
    save_path = cfg.results_path + "rpn" + cfg.my_results_dir + '/'
    mode = generator.data_type

    if not os.path.exists(save_path):
        save_path = save_path[:-1]
    save_path += '/'

    utils.save_dict(evalData, save_path + mode + '_res')
    #    evalData = utils.load_dict(save_path+mode+'_res')

    AR, R5, IoU = metrics.computeRPNAR(evalData, generator.imagesMeta,
                                       obj_mapping, cfg)
    saveMeta = {'AR': AR, 'R5': R5, 'IoU': IoU.tolist()}
    utils.save_dict(saveMeta, save_path + mode + '_mAP')
    print('R5', mode, R5)
    print('AR', mode, AR)
    return IoU
コード例 #17
0
def caculate_revenue():
    series_dict = utils.get_dict('series_dict')
    revenue_days = 60
    revenue_dict = {}
    for key in series_dict:
        value_list = series_dict[key]
        code_df = utils.read(key)
        for date_string in value_list:
            print key
            print date_string
            series_date = (datetime.datetime.strptime(date_string,
                                                      '%Y-%m-%d')).date()
            series_df = code_df[code_df['date'].isin([date_string])]
            series_close = float(series_df['close'])
            revenue_date = series_date

            for days in range(revenue_days):
                revenue_list = revenue_dict.get(str(days), [])
                revenue_date = revenue_date + datetime.timedelta(days=1)
                revenue_date_string = revenue_date.strftime('%Y-%m-%d')
                revenue_df = code_df[code_df['date'].isin(
                    [revenue_date_string])]

                if revenue_df.empty:
                    for nextday in range(10):
                        revenue_date = revenue_date + datetime.timedelta(
                            days=1)
                        revenue_date_string = revenue_date.strftime('%Y-%m-%d')
                        revenue_df = code_df[code_df['date'].isin(
                            [revenue_date_string])]
                        if not revenue_df.empty:
                            break

                if revenue_df.empty:
                    continue

                revenue_close = float(revenue_df['close'])
                revenue = revenue_close / series_close
                revenue_list.append(revenue)
                revenue_dict[str(days)] = revenue_list
                # print days
                # print revenue_date
                # print revenue
    utils.save_dict(revenue_dict, 'revenue_dict')
コード例 #18
0
def benchmark(parameter_name, values, config=CONFIG):
    save_folder = generate_folder_name()
    print("\n\nBenchmark for parameter '{}' with values {}".format(
        parameter_name, values))

    results = np.empty((len(values), 2), dtype=float)
    for index, v in enumerate(values):
        print("Run {} with value {} on GPU".format(index + 1, v))
        results[index, 0] = run((parameter_name, v), ('gpu', True))
        utils.clear_current_line()

        print("Run {} with value {} on CPU".format(index + 1, v))
        results[index, 1] = run((parameter_name, v), ('gpu', False))
        utils.clear_current_line()

    result_str = "Results: \n{}".format(results)
    print(result_str)
    utils.save_dict(config, save_folder, 'config_' + parameter_name + '.txt')
    utils.save_string_to_file(result_str, save_folder, 'results.txt')
コード例 #19
0
ファイル: det_test.py プロジェクト: aag147/rcnn
def saveEvalResults(generator, cfg):

    my_output_path = cfg.results_path + 'det' + cfg.my_results_dir + '/res/' + generator.data_type + '/'

    evalData = []
    nb_empty = 0
    for batchidx, (imageID,
                   imageMeta) in enumerate(generator.imagesMeta.items()):
        if os.path.exists(my_output_path + str(imageID) + '.pkl'):
            data = utils.load_obj(my_output_path + imageID)
            if data is not None and len(data) > 0:
                evalData.extend(data)
        else:
            nb_empty += 1

    path = cfg.results_path + "det" + cfg.my_results_dir + '/'
    mode = generator.data_type
    utils.save_dict(evalData, path + mode + '_res')
    print('nb_empty', nb_empty)
コード例 #20
0
def gen_p(name,start_year, end_year, minimum_mpg = 15, minimum_g = 30, verbose = False):
    """Given a filename and a range of seasons, create a pickle file of a dictionary containing all the players meting the minutes per game and game requirements

    Args:
        name (str): The name of the pickle file to be saved
        start year (int): start of data query
        end_year (int): end of data query
            ### Note: For a range from the 2009-2010 season to the 2013-2014 season, start_year would be 2010 and end_year would be 2014
        minimum_mpg (int): minimum minutes per game for someone to be include in this list
        minimum_g (int): minimum games for someone to be include in this list
        verbose (bool): when True, print out when the function moves on to the next year

    Returns:
        None

    Todo:
        * Is there an way to not make the arguments of this the exact same as those of the above function??
    """
    x = get_player_names(start_year, end_year, minimum_mpg, minimum_g, verbose)
    utils.save_dict(x,name)
コード例 #21
0
def load_problems():
    identifier = 1001
    utils.load_dict(problem_dict, 'problem_dict')
    while True:
        try:
            problem_id = str(identifier)
            if problem_id not in problem_dict.keys():
                print identifier
                problem_html = utils.get_html("problem", problem_id=problem_id)
                if problem_html == 'ERROR':
                    break
                tmp_dict = dict()
                tmp_dict["problem_id"] = problem_id
                tmp_dict["level"] = utils.get_level_from_html(problem_html)
                tmp_dict["category"] = utils.get_category_from_html(problem_html)
                tmp_dict["title"] = utils.get_title_from_html(problem_html)
                problem_dict[problem_id] = tmp_dict
                identifier += 1
        except KeyboardInterrupt:
            break
    utils.save_dict(problem_dict, 'problem_dict')
コード例 #22
0
ファイル: feature_extraction.py プロジェクト: Ne88ie/STC
def get_docs_vocab(filenames, treshhold, stop_lemms, path_to_save_vocab=None, path_to_save_docs=None):
    vectorizer = CountVectorizer(input=u'filename',
                                 encoding=u'utf-8',
                                 lowercase=True,
                                 preprocessor=lemmer, # None
                                 tokenizer=None,
                                 token_pattern=u'(?u)[A-zА-я\-]{2,}',
                                 stop_words=stop_lemms,
                                 analyzer=u'word',
                                 max_df=treshhold,
                                 min_df=0.0,
                                 binary=False,                 # True
                                 )

    vectorizer.fit(filenames)
    del_meaningless_words(vectorizer.vocabulary_)
    if path_to_save_vocab:
        save_dict(vectorizer.vocabulary_, path_to_save_vocab)

    docs = get_docs(vectorizer, filenames)
    if path_to_save_docs:
        with open(path_to_save_docs, 'wb') as f:
            pickle.dump(docs, f)
    return docs, vectorizer.vocabulary_
コード例 #23
0
def run_engine(config):
    """

    :return:
    """

    number_of_documents = 0
    sum_of_doc_lengths = 0

    r = ReadFile(corpus_path=config.get__corpusPath())
    p = Parse(config.toStem)
    indexer = Indexer(config, glove_dict)
    # documents_list = r.read_file(file_name=config.get__corpusPath())
    parquet_documents_list = r.read_folder(config.get__corpusPath())
    for parquet_file in parquet_documents_list:
        documents_list = r.read_file(file_name=parquet_file)
        # Iterate over every document in the file
        for idx, document in enumerate(documents_list):
            # parse the document
            parsed_document = p.parse_doc(document)
            if parsed_document is None:
                continue
            number_of_documents += 1
            sum_of_doc_lengths += parsed_document.doc_length
            # index the document data
            indexer.add_new_doc(parsed_document)

    # saves last posting file after indexer has done adding documents.
    indexer.save_postings()
    if len(indexer.doc_posting_dict) > 0:
        indexer.save_doc_posting()
    utils.save_dict(indexer.document_dict, "documents_dict", config.get_out_path())
    if len(indexer.document_posting_covid) > 0:
        indexer.save_doc_covid()

    indexer.delete_dict_after_saving()

    # merges posting files.
    indexer.merge_chunks()
    utils.save_dict(indexer.inverted_idx, "inverted_idx", config.get_out_path())

    dits = {'number_of_documents': number_of_documents, "avg_length_per_doc": sum_of_doc_lengths/number_of_documents }

    utils.save_dict(dits, 'details', config.get_out_path())
コード例 #24
0
            clean_unpacked = clean_unpacked.squeeze().cpu().detach().numpy()
            lycon.save(args.result_dir + 'png/clean/' + filename[:-4] + '.png',
                       (clean_unpacked * 255).astype(np.uint8))

            noisy_packed = raw_noisy
            noisy_packed = noisy_packed[:, padh[j] // 2:-padh[j] // 2,
                                        padw[j] // 2:-padw[j] //
                                        2]  ## RGGB channels
            noisy_unpacked = utils.unpack_raw(noisy_packed.unsqueeze(
                0))  ## Rearrange RGGB channels into Bayer pattern
            noisy_unpacked = noisy_unpacked.squeeze().cpu().detach().numpy()
            lycon.save(args.result_dir + 'png/noisy/' + filename[:-4] + '.png',
                       (noisy_unpacked * 255).astype(np.uint8))

            variance_packed = variance[:, padh[j] // 2:-padh[j] // 2,
                                       padw[j] // 2:-padw[j] //
                                       2]  ## RGGB channels

            #import pdb;pdb.set_trace()
            dict_ = {}
            dict_['clean'] = clean_packed.cpu().detach().numpy(
            )  ## (4 x H/2 x W/2)
            dict_['noisy'] = noisy_packed.cpu().detach().numpy(
            )  ## (4 x H/2 x W/2)
            dict_['variance'] = variance_packed.cpu().detach().numpy(
            )  ## (4 x H/2 x W/2)
            dict_['shot_noise'] = shot_noise.cpu().detach().numpy()
            dict_['read_noise'] = read_noise.cpu().detach().numpy()
            utils.save_dict(dict_,
                            args.result_dir + 'pkl/' + filename[:-4] + '.pkl')
コード例 #25
0
    def get_best_model(self,
                       param_grid,
                       scoring='accuracy',
                       n_jobs=1,
                       verbose=1,
                       save=False):
        """
        Takes data and model information and returns a dictionary of metrics on the best estimator for each data 
        set via grid search
        
        Params:
        - param_grid: (dict or list of dicts) 
            values to perform grid search over
        - scoring: str, optional (default='accuracy')
            specifies how to rank each estimator
        - n_jobs: int, optional (default='1')
            number of cores to run training on. -1 includes all cores
        - verbose: int, optional (default='1')
            specifies if output messages should be provided
        - save: bool, optional (default=False)
            flag for saving metric dict
        Returns:
        - clf: gridsearch object with best performance
        """

        # Make sure proper data was passed in
        try:
            assert type(param_grid) in [list, set, tuple, dict]
        except AssertionError:
            raise ValueError('Unexpected data type passed in for param_grid')
        try:
            if type(param_grid) is not dict:
                assert type(param_grid[0]) == dict
        except AssertionError:
            raise ValueError('Unexpected data type passed in for param_grid')

        self.metric_dict = dict()
        clf = GridSearchCV(estimator=self.estimator,
                           param_grid=param_grid,
                           cv=5,
                           n_jobs=n_jobs,
                           verbose=verbose,
                           scoring=scoring)

        X, y = self.data[:,
                         1:], self.data[:, :1]  #Treats first column as label
        for i in range(3):  # Completes 3 trials
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, train_size=5000, shuffle=True)

            clf.fit(X_train, y_train.ravel())  # Fit training data to model

            # Gather training set metrics
            y_train_pred = clf.predict(X_train)
            acc_train = accuracy_score(y_train, y_train_pred)
            precision_train, recall_train, f1_train, _ = precision_recall_fscore_support(
                y_train, y_train_pred)

            # Gather testing set metrics
            y_test_pred = clf.predict(
                X_test
            )  # Predict test values using best parameters from classifier
            acc_test = accuracy_score(
                y_test, y_test_pred)  # Get accuracy for predictions
            precision_test, recall_test, f1_test, _ = precision_recall_fscore_support(
                y_test, y_test_pred)

            # Save metrics to dict for further analysis
            self.metric_dict[(self.data_name, i)] = {
                'acc_test': acc_test,
                'acc_train': acc_train,
                'precision_test': precision_test,
                'precision_train': precision_train,
                'recall_test': recall_test,
                'recall_train': recall_train,
                'f1_test': f1_test,
                'f1_train': f1_train,
                'model': clf,
                'cv_results': clf.cv_results_
            }  # Add metrics to dict for analysis
            if save:
                # Save checkpoint results in case of hardware failure
                loc_str = self.estimator.__class__.__name__  # this just gets clf type (eg SVC, LogisticRegression, etc)

                # Checks if the output path already exists, and makes it if not
                save_dir = os.path.join('..', 'checkpoints', f'{loc_str}')
                if not os.path.isdir(save_dir):
                    print(f'Creating {loc_str} directory now')
                    os.mkdir(os.path.join('..', 'checkpoints', loc_str))
                    save_path = os.path.join(
                        save_dir, f'{loc_str}_{self.data_name}_{i}.pkl')
                    save_dict(self.metric_dict, save_path)

        return clf
コード例 #26
0
                rels.append(prsBB)

        if rels:
            imagesMeta[imageID.split('.')[0]] = {
                'imageName': imageID,
                'objects': rels
            }


#    print(mlk)
    return imagesMeta

if __name__ == "__main__":
    #    metaData = sio.loadmat(url + 'anno.mat', struct_as_record=False, squeeze_me=True)
    bbData = sio.loadmat(url + 'anno_bbox.mat',
                         struct_as_record=False,
                         squeeze_me=True)
    #    actions = bbData['list_action']
    #    trainYMatrix = metaData['anno_train']
    bbDataTrain = bbData['bbox_train']
    cfg = basic_config()
    cfg = set_config(cfg)
    cfg.dataset = 'HICO'
    cfg.get_data_path()
    cfg.get_results_paths()
    labels = utils.load_dict(cfg.data_path + 'labels')
    print("Extract meta data")
    tmpTrainMeta = extractMetaData(bbDataTrain, labels)

    utils.save_dict(tmpTrainMeta, url + 'train_objs')
コード例 #27
0
    data_set = data_manager.load_data_set(conf.data_set)
    X = data_set.data
    y = data_set.target
    class_indices = list(set(y))

    # Split data into training and testing set
    if conf.training_test_split:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
    else:
        X_train = X_test = X
        y_train = y_test = y

    clf = acoc.PolyACO(X.shape[1], class_indices, save_folder=SAVE_FOLDER)
    clf.train(X_train, y_train, start_time=conf.start_time)
    predictions = clf.evaluate(X_test)
    return acoc.compute_score(predictions, y_test)


if __name__ == "__main__":
    utils.save_dict(CLASSIFIER_CONFIG, parent_folder=SAVE_FOLDER, file_name='config.json')
    scores = []
    runs = 1
    result_str = ''
    start_time = time()
    for i in range(runs):
        scores.append(run(start_time=start_time))
        print("\nRun {}/{} score: {:.4f}".format(i + 1, runs, scores[-1]))
    result_str = ','.join([str(s) for s in scores]) + "\nAverage score with {}-fold cross validation: {:.5f}".format(runs, sum(scores) / runs)
    utils.save_string_to_file(result_str, parent_folder=SAVE_FOLDER, file_name='result.txt')
    print("\n" + result_str)
コード例 #28
0
    for j, c in enumerate(configurations):
        results[j].append(run(**c))

mean_results = np.array(results).mean(1).tolist()


def np_list_to_csv_string(npl):
    return ",".join(list(map(lambda f: "{:.4f}".format(f), npl)))

csv = []
for arr in mean_results:
    csv.append(np_list_to_csv_string(arr))

utils.save_object(mean_results, SAVE_FOLDER, 'results')
utils.save_string_to_file("\n".join(csv), SAVE_FOLDER, 'results.csv')
utils.save_dict(CLASSIFIER_CONFIG, SAVE_FOLDER, 'config.json')


data = np.array(mean_results)
x = range(data.shape[1])
fig, ax = plt.subplots()

plotter.hide_top_and_right_axis(ax)
ax.yaxis.grid(color='gray')
ax.set_xlabel('Time (seconds)')
ax.set_ylabel('Best polygon solution')
ax.set_prop_cycle(cycler('color', ['c', 'm', 'y', 'k', 'r', 'g', 'b']))

lines = []
for i in range(len(configurations)):
    lines.append(ax.plot(x, data[i], label=labels[i]))
コード例 #29
0
ファイル: feature_extraction.py プロジェクト: Ne88ie/STC
    filenames = sorted(os.path.join(path_to_dir, file) for file in os.listdir(path_to_dir) if file[-4:] == '.txt')
    path_to_save_vocab = '../data/vocabulary.txt'
    path_to_save_docs = '../data/docs'

    vectorizer = CountVectorizer(input=u'filename',
                                 encoding=u'utf-8',
                                 lowercase=True,
                                 preprocessor=lemmer, # None
                                 tokenizer=None,
                                 token_pattern=u'(?u)[A-zА-я\-]{2,}',
                                 stop_words=stop_lemms,
                                 analyzer=u'word',
                                 max_df=treshhold,
                                 min_df=0.0,
                                 binary=False,                 # True
                                 )



    vectorizer.fit(filenames)
    del_meaningless_words(vectorizer.vocabulary_)
    save_dict(vectorizer.vocabulary_, '../data/vocabulary.txt')

    transform = vectorizer.transform(filenames)
    with open('../data/transforms', 'wb') as f:
        pickle.dump(transform, f)

    docs = get_docs(vectorizer, filenames)
    with open('../data/docs', 'wb') as f:
        pickle.dump(docs, f)
コード例 #30
0
    cm_rnn = test_model(model, test_dataloader, device)

    acc = accuracy(cm_rnn)
    f1_score = f1_score_mean(cm_rnn)
    precision, recall = precision_recall_mean(cm_rnn)

    hyper_parameters = {
        'epochs': num_epochs,
        'batch_size': batch_size,
        'lr': learning_rate,
        'dropout': dropout
    }

    results = {
        'confusion_matrix': cm_rnn.tolist(),
        'accuracy': acc,
        'f1 score': f1_score,
        'precision': precision,
        'recall': recall
    }

    save_dict(results, f'experiments/{model_name}/results.txt')
    save_dict(hyper_parameters, f'experiments/{model_name}/hyper.txt')

    model_path = f'experiments/{model_name}/{model_name}.pt'
    torch.save(model.state_dict(), model_path)

    print('f1 score', f1_score)
    print(model_name, 'done')
コード例 #31
0
                raw_gt = raw_gt.squeeze(0).cpu().detach()
                tile_output[:, Y_lower // 2:Y_upper // 2,
                            X_lower // 2:X_upper //
                            2] = raw_gt[:, :size_Y // 2, :size_X // 2]

        #### Unpadding and saving
        print(f'output shape={tile_output.shape}')
        clean_packed = tile_output[:, :, :]  ## RGGB channels  (4 x H/2 x W/2)
        clean_unpacked = utils.unpack_raw(clean_packed.unsqueeze(
            0))  ## Rearrange RGGB channels into Bayer pattern
        clean_unpacked = clean_unpacked.squeeze().cpu().detach().numpy()

        try:
            print(os.path.join(args.result_dir, 'png', filename[:-4] + '.png'))
            # lycon.save(os.path.join(args.result_dir, 'png', filename[:-4]+'.png'),(clean_unpacked*255).astype(np.uint8))
            cv2.imwrite(
                os.path.join(args.result_dir, 'png', filename[:-4] + '.png'),
                (clean_unpacked * 255).astype(np.uint8))
            # cv2.imwrite(args.result_dir+'png/clean/'+filename[:-4]+'.png',(clean_unpacked*255).astype(np.uint8))
        except cv2.error as e:
            print(filename)
            print(clean_packed)
            #import pdb;pdb.set_trace()
        dict_ = {}
        dict_['raw'] = clean_packed.cpu().detach().numpy()  ## (4 x H/2 x W/2)
        utils.save_dict(
            dict_, os.path.join(args.result_dir, 'pkl',
                                filename[:-4] + '.pkl'))
        # gc.collect()
コード例 #32
0
ファイル: test_csvae.py プロジェクト: asmadotgh/dissect
def test(config,
         dbg_img_label_dict=None,
         dbg_mode=False,
         export_output=True,
         dbg_size=10,
         dbg_img_indices=[],
         calc_stability=True):

    # ============= Experiment Folder=============
    assets_dir = os.path.join(config['log_dir'], config['name'])
    log_dir = os.path.join(assets_dir, 'log')
    ckpt_dir = os.path.join(assets_dir, 'ckpt_dir')
    sample_dir = os.path.join(assets_dir, 'sample')

    # Whether this is for saving the results for substitutability metric or the regular testing process.
    # If only for substitutability, we skip saving large arrays and additional multiple random outputs to avoid OOM
    calc_substitutability = config['calc_substitutability']

    if calc_substitutability:
        substitutability_attr = config['substitutability_attr']

        test_dir = os.path.join(assets_dir, 'test', 'substitutability_input')
        substitutability_exported_img_label_dict = os.path.join(
            test_dir, '{}_dims_{}_clss_{}.txt'.format(substitutability_attr,
                                                      config['w_dim'],
                                                      config['num_bins']))
        substitutability_label_scaler = config['num_bins'] - 1
        exported_dict = {}

        substitutability_classifier_config = config[
            'substitutability_classifier_config']
        _cls_config = yaml.load(open(config['classifier_config']))
        substitutability_img_subset = _cls_config['train']
        substitutability_img_label_dict = _cls_config['image_label_dict']
        _edited_cls_config = deepcopy(_cls_config)
        _edited_cls_config['image_dir'] = os.path.join(test_dir, 'images')
        if not os.path.exists(_edited_cls_config['image_dir']):
            os.makedirs(_edited_cls_config['image_dir'])
        _edited_cls_config[
            'image_label_dict'] = substitutability_exported_img_label_dict
        _edited_cls_config['train'] = os.path.join(test_dir, 'train_ids.npy')
        _edited_cls_config['test'] = ''  # skips evaluating on test
        _edited_cls_config['log_dir'] = test_dir
        _edited_cls_config['ckpt_dir_continue'] = ''
        save_config_dict(_edited_cls_config,
                         substitutability_classifier_config)
    else:
        test_dir = os.path.join(assets_dir, 'test')

    # ============= Experiment Parameters =============

    ckpt_dir_cls = config['cls_experiment']
    if 'evaluation_batch_size' in config.keys():
        BATCH_SIZE = config['evaluation_batch_size']
    else:
        BATCH_SIZE = config['batch_size']
    channels = config['num_channel']
    input_size = config['input_size']
    NUMS_CLASS_cls = config['num_class']
    NUMS_CLASS = config['num_bins']
    MU_CLUSTER = config['mu_cluster']
    VAR_CLUSTER = config['var_cluster']
    TRAVERSAL_N_SIGMA = config['traversal_n_sigma']
    STEP_SIZE = 2 * TRAVERSAL_N_SIGMA * VAR_CLUSTER / (NUMS_CLASS - 1)
    OFFSET = MU_CLUSTER - TRAVERSAL_N_SIGMA * VAR_CLUSTER

    metrics_stability_nx = config['metrics_stability_nx']
    metrics_stability_var = config['metrics_stability_var']
    target_class = config['target_class']
    ckpt_dir_continue = ckpt_dir
    if dbg_img_label_dict is not None:
        image_label_dict = dbg_img_label_dict
    elif calc_substitutability:
        image_label_dict = substitutability_img_label_dict
    else:
        image_label_dict = config['image_label_dict']

    # CSVAE parameters
    beta1 = config['beta1']
    beta2 = config['beta2']
    beta3 = config['beta3']
    beta4 = config['beta4']
    beta5 = config['beta5']
    z_dim = config['z_dim']
    w_dim = config['w_dim']

    if dbg_mode:
        num_samples = dbg_size
    else:
        num_samples = config['count_to_save']

    dataset = config['dataset']

    if dataset == 'CelebA':
        my_data_loader = ImageLabelLoader(input_size=128)
        pretrained_classifier = celeba_classifier
        EncoderZ = EncoderZ_128
        EncoderW = EncoderW_128
        DecoderX = DecoderX_128
        DecoderY = DecoderY_128
    elif dataset == 'shapes':
        if calc_substitutability:
            my_data_loader = ShapesLoader()
        else:
            # my_data_loader = ShapesLoader()
            # for efficiency, let's just load as many samples as we need
            my_data_loader = ShapesLoader(
                dbg_mode=True,
                dbg_size=num_samples,
                dbg_image_label_dict=image_label_dict,
                dbg_img_indices=dbg_img_indices)
            dbg_mode = True
        pretrained_classifier = shapes_classifier
        EncoderZ = EncoderZ_64
        EncoderW = EncoderW_64
        DecoderX = DecoderX_64
        DecoderY = DecoderY_64
    elif dataset == 'CelebA64' or dataset == 'dermatology':
        my_data_loader = ImageLabelLoader(input_size=64)
        pretrained_classifier = celeba_classifier
        EncoderZ = EncoderZ_64
        EncoderW = EncoderW_64
        DecoderX = DecoderX_64
        DecoderY = DecoderY_64
    elif dataset == 'synthderm':
        my_data_loader = ImageLabelLoader(input_size=64)
        pretrained_classifier = celeba_classifier
        EncoderZ = EncoderZ_64
        EncoderW = EncoderW_64
        DecoderX = DecoderX_64
        DecoderY = DecoderY_64

    # ============= Data =============
    try:
        categories, file_names_dict = read_data_file(image_label_dict)
    except:
        print("Problem in reading input data file : ", image_label_dict)
        sys.exit()
    if calc_substitutability:
        data = np.load(substitutability_img_subset)
        num_samples = len(data)
    elif dbg_mode and dataset == 'shapes':
        data = np.array([str(ind) for ind in my_data_loader.tmp_list])
    else:
        if len(dbg_img_indices) > 0:
            data = np.asarray(dbg_img_indices)
        else:
            data = np.asarray(list(file_names_dict.keys()))
    print("The classification categories are: ")
    print(categories)
    print('The size of the test set: ', data.shape[0])

    # ============= placeholder =============
    x_source = tf.placeholder(tf.float32,
                              [None, input_size, input_size, channels],
                              name='x_source')
    y_s = tf.placeholder(tf.int32, [None, NUMS_CLASS_cls], name='y_s')
    y_source = y_s[:, NUMS_CLASS_cls - 1]
    train_phase = tf.placeholder(tf.bool, name='train_phase')

    y_target = tf.placeholder(tf.int32, [None, w_dim],
                              name='y_target')  # between 0 and NUMS_CLASS

    generation_dim = w_dim

    # ============= CSVAE =============
    encoder_z = EncoderZ('encoder_z')
    encoder_w = EncoderW('encoder_w')
    decoder_x = DecoderX('decoder_x')
    decoder_y = DecoderY('decoder_y')

    # encode x to get mean, log variance, and samples from the latent subspace Z
    mu_z, logvar_z, z = encoder_z(x_source, z_dim)
    # encode x and y to get mean, log variance, and samples from the latent subspace W
    mu_w, logvar_w, w = encoder_w(x_source, y_source, w_dim)

    # pass samples of z and w to get predictions of x
    pred_x = decoder_x(tf.concat([w, z], axis=-1))
    # get predicted labels based only on the latent subspace Z
    pred_y = decoder_y(z, NUMS_CLASS_cls)

    # Create a single image based on y_target
    target_w = STEP_SIZE * tf.cast(y_target, dtype=tf.float32) + OFFSET
    fake_target_img = decoder_x(tf.concat([target_w, z], axis=-1))

    # ============= pre-trained classifier =============
    real_img_cls_logit_pretrained, real_img_cls_prediction = pretrained_classifier(
        x_source, NUMS_CLASS_cls, reuse=False, name='classifier')
    fake_recon_cls_logit_pretrained, fake_recon_cls_prediction = pretrained_classifier(
        pred_x, NUMS_CLASS_cls, reuse=True)
    fake_img_cls_logit_pretrained, fake_img_cls_prediction = pretrained_classifier(
        fake_target_img, NUMS_CLASS_cls, reuse=True)

    # ============= predicted probabilities =============
    fake_target_p_tensor = tf.reduce_max(tf.cast(y_target, tf.float32) * 1.0 /
                                         float(NUMS_CLASS - 1),
                                         axis=1)

    # ============= session =============
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()

    # ============= Checkpoints =============
    print(" [*] Reading checkpoint...")

    ckpt = tf.train.get_checkpoint_state(ckpt_dir_continue)
    if ckpt and ckpt.model_checkpoint_path:
        ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
        saver.restore(sess, os.path.join(ckpt_dir_continue, ckpt_name))
        print(ckpt_dir_continue, ckpt_name)
        print("Successful checkpoint upload")
    else:
        print("Failed checkpoint load")
        sys.exit()

    # ============= load pre-trained classifier checkpoint =============
    class_vars = [
        var for var in slim.get_variables_to_restore()
        if 'classifier' in var.name
    ]
    name_to_var_map_local = {var.op.name: var for var in class_vars}
    temp_saver = tf.train.Saver(var_list=name_to_var_map_local)
    ckpt = tf.train.get_checkpoint_state(ckpt_dir_cls)
    ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
    temp_saver.restore(sess, os.path.join(ckpt_dir_cls, ckpt_name))
    print("Classifier checkpoint loaded.................")
    print(ckpt_dir_cls, ckpt_name)

    # ============= Testing =============
    def _save_output_array(name, values):
        np.save(os.path.join(test_dir, '{}.npy'.format(name)), values)

    if not calc_substitutability:
        names = np.empty([num_samples], dtype=object)
        real_imgs = np.empty([num_samples, input_size, input_size, channels])
        fake_t_imgs = np.empty([
            num_samples, generation_dim, NUMS_CLASS, input_size, input_size,
            channels
        ])
        fake_s_recon_imgs = np.empty([
            num_samples, generation_dim, NUMS_CLASS, input_size, input_size,
            channels
        ])
        real_ps = np.empty(
            [num_samples, generation_dim, NUMS_CLASS, NUMS_CLASS_cls])
        recon_ps = np.empty(
            [num_samples, generation_dim, NUMS_CLASS, NUMS_CLASS_cls])
        fake_target_ps = np.empty([num_samples, generation_dim, NUMS_CLASS])
        fake_ps = np.empty(
            [num_samples, generation_dim, NUMS_CLASS, NUMS_CLASS_cls])

        # For stability metric
        stability_fake_t_imgs = np.empty([
            num_samples, metrics_stability_nx, generation_dim, NUMS_CLASS,
            input_size, input_size, channels
        ])
        stability_fake_s_recon_imgs = np.empty([
            num_samples, metrics_stability_nx, generation_dim, NUMS_CLASS,
            input_size, input_size, channels
        ])
        stability_recon_ps = np.empty([
            num_samples, metrics_stability_nx, generation_dim, NUMS_CLASS,
            NUMS_CLASS_cls
        ])
        stability_fake_ps = np.empty([
            num_samples, metrics_stability_nx, generation_dim, NUMS_CLASS,
            NUMS_CLASS_cls
        ])

        arrs_to_save = [
            'names', 'real_imgs', 'fake_t_imgs', 'fake_s_recon_imgs',
            'real_ps', 'recon_ps', 'fake_target_ps', 'fake_ps',
            'stability_fake_t_imgs', 'stability_fake_s_recon_imgs',
            'stability_recon_ps', 'stability_fake_ps'
        ]

    np.random.shuffle(data)

    data = data[0:num_samples]
    for i in range(math.ceil(data.shape[0] / BATCH_SIZE)):
        image_paths = data[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]
        # num_seed_imgs is either BATCH_SIZE
        # or if the number of samples is not divisible by BATCH_SIZE a smaller value
        num_seed_imgs = np.shape(image_paths)[0]
        img, _labels = my_data_loader.load_images_and_labels(
            image_paths,
            config['image_dir'],
            1,
            file_names_dict,
            channels,
            do_center_crop=True)
        img_repeat = np.repeat(img, NUMS_CLASS * generation_dim, 0)

        labels = np.repeat(_labels, NUMS_CLASS * generation_dim, 0)
        labels = labels.ravel()
        labels = np.eye(NUMS_CLASS_cls)[labels.astype(int)]

        _dim_bin_arr = np.zeros((generation_dim * NUMS_CLASS, generation_dim))
        for _gen_dim in range(generation_dim):
            _start = _gen_dim * NUMS_CLASS
            _end = (_gen_dim + 1) * NUMS_CLASS
            _dim_bin_arr_sub = np.zeros((NUMS_CLASS, generation_dim))
            _dim_bin_arr_sub[:, _gen_dim] = np.asarray(range(NUMS_CLASS))
            _dim_bin_arr[_start:_end, :] = _dim_bin_arr_sub
        target_labels = np.tile(
            _dim_bin_arr,
            (num_seed_imgs, 1))  # [num_seed_imgs * w_dim * NUMS_CLASS, w_dim]
        # target_labels = np.tile(
        #     np.repeat(np.expand_dims(np.asarray(range(NUMS_CLASS)), axis=1), generation_dim, axis=1),
        #     (num_seed_imgs*generation_dim, 1))  # [num_seed_imgs * w_dim * NUMS_CLASS, w_dim]

        my_feed_dict = {
            y_target: target_labels,
            x_source: img_repeat,
            train_phase: False,
            y_s: labels
        }

        fake_t_img, fake_s_recon_img, real_p, recon_p, fake_target_p, fake_p = sess.run(
            [
                fake_target_img, pred_x, real_img_cls_prediction,
                fake_recon_cls_prediction, fake_target_p_tensor,
                fake_img_cls_prediction
            ],
            feed_dict=my_feed_dict)

        print('{} / {}'.format(i + 1, math.ceil(data.shape[0] / BATCH_SIZE)))

        _num_cur_samples = len(image_paths)

        if calc_substitutability:
            _ind_generation_dim = np.random.randint(low=0,
                                                    high=generation_dim,
                                                    size=_num_cur_samples)
            reshaped_imgs = np.reshape(
                fake_t_img, (_num_cur_samples, generation_dim, NUMS_CLASS,
                             input_size, input_size, channels))
            sub_exported_dict = save_batch_images(
                reshaped_imgs,
                image_paths,
                _ind_generation_dim,
                _labels,
                substitutability_label_scaler,
                _edited_cls_config['image_dir'],
                has_extension=(dataset != 'shapes'))
            exported_dict.update(sub_exported_dict)
        else:
            start_ind = i * BATCH_SIZE
            end_ind = start_ind + _num_cur_samples
            names[start_ind:end_ind] = np.asarray(image_paths)

            if calc_stability:
                for j in range(metrics_stability_nx):
                    noisy_img = img + np.random.normal(
                        loc=0.0,
                        scale=metrics_stability_var,
                        size=np.shape(img))
                    stability_img_repeat = np.repeat(
                        noisy_img, NUMS_CLASS * generation_dim, 0)
                    stability_feed_dict = {
                        y_target: target_labels,
                        x_source: stability_img_repeat,
                        train_phase: False,
                        y_s: labels
                    }
                    _stability_fake_t_img, _stability_fake_s_recon_img, _stability_recon_p, _stability_fake_p = sess.run(
                        [
                            fake_target_img, pred_x, fake_recon_cls_prediction,
                            fake_img_cls_prediction
                        ],
                        feed_dict=stability_feed_dict)

                    stability_fake_t_imgs[start_ind:end_ind, j] = np.reshape(
                        _stability_fake_t_img,
                        (_num_cur_samples, generation_dim, NUMS_CLASS,
                         input_size, input_size, channels))
                    stability_fake_s_recon_imgs[
                        start_ind:end_ind, j] = np.reshape(
                            _stability_fake_s_recon_img,
                            (_num_cur_samples, generation_dim, NUMS_CLASS,
                             input_size, input_size, channels))
                    stability_recon_ps[start_ind:end_ind, j] = np.reshape(
                        _stability_recon_p, (_num_cur_samples, generation_dim,
                                             NUMS_CLASS, NUMS_CLASS_cls))
                    stability_fake_ps[start_ind:end_ind, j] = np.reshape(
                        _stability_fake_p, (_num_cur_samples, generation_dim,
                                            NUMS_CLASS, NUMS_CLASS_cls))

            real_imgs[start_ind:end_ind] = img
            fake_t_imgs[start_ind:end_ind] = np.reshape(
                fake_t_img, (_num_cur_samples, generation_dim, NUMS_CLASS,
                             input_size, input_size, channels))
            fake_s_recon_imgs[start_ind:end_ind] = np.reshape(
                fake_s_recon_img,
                (_num_cur_samples, generation_dim, NUMS_CLASS, input_size,
                 input_size, channels))
            real_ps[start_ind:end_ind] = np.reshape(
                real_p,
                (_num_cur_samples, generation_dim, NUMS_CLASS, NUMS_CLASS_cls))
            recon_ps[start_ind:end_ind] = np.reshape(
                recon_p,
                (_num_cur_samples, generation_dim, NUMS_CLASS, NUMS_CLASS_cls))
            fake_target_ps[start_ind:end_ind] = np.reshape(
                fake_target_p, (_num_cur_samples, generation_dim, NUMS_CLASS))
            fake_ps[start_ind:end_ind] = np.reshape(
                fake_p,
                (_num_cur_samples, generation_dim, NUMS_CLASS, NUMS_CLASS_cls))

    output_dict = {}
    if calc_substitutability:
        save_dict(exported_dict, substitutability_exported_img_label_dict,
                  substitutability_attr)
        np.save(_edited_cls_config['train'],
                np.asarray(list(exported_dict.keys())))

        # retrain the classifier with the new generated images
        tf.reset_default_graph()
        train_classif(config['substitutability_classifier_config'])
    else:
        if export_output:
            for arr_name in arrs_to_save:
                _save_output_array(arr_name, eval(arr_name))

        for arr_name in arrs_to_save:
            output_dict.update({arr_name: eval(arr_name)})

    return output_dict
コード例 #33
0
ファイル: extractHICOData.py プロジェクト: aag147/rcnn
if __name__ == "__main__":
    #    metaData = sio.loadmat(url + 'anno.mat', struct_as_record=False, squeeze_me=True)
    bbData = sio.loadmat(url + 'anno_bbox.mat',
                         struct_as_record=False,
                         squeeze_me=True)
    #    actions = bbData['list_action']
    #    trainYMatrix = metaData['anno_train']
    bbDataTrain = bbData['bbox_train']
    cfg = basic_config()
    cfg = set_config(cfg)
    cfg.dataset = 'HICO'
    cfg.get_data_path()
    cfg.get_results_paths()
    labels = utils.load_dict(cfg.data_path + 'labels')
    print("Extract meta data")
    tmpTrainMeta = extractMetaData(bbDataTrain)
    print("Combine similar BBs")
    newTrainMeta = combineSimilarBBs(tmpTrainMeta, labels, 0.4)
    newTrainMetaID = list(newTrainMeta.keys())
    newTrainMetaID.sort()
    #    imagesID = imagesID[6490:7000]
    #    images = pp.loadImages(imagesID, imagesMeta, url+"images/train2015/")
    #    [dataXP, dataXB, dataY, dataMeta] = pp.getData(imagesID, imagesMeta, images, (224,244))
    #    trainYMatrix = pp.getMatrixLabels(len(actions), dataY)
    utils.save_dict(tmpTrainMeta, url + 'HICO_train_GT')
    utils.save_dict(newTrainMeta, url + 'HICO_train_P')
#    sampleMeta = imagesMeta[imagesID[0]]
#    i = 0
#    pdata.drawImages(imagesID[i*9:(i+1)*9], imagesMeta, url+'images/train2015/', False)
コード例 #34
0
def doccano2SA(doccano_file,
               save_ext_dir,
               save_cls_dir,
               splits=[0.8, 0.9],
               is_shuffle=True):
    """
        @Description: Consvert doccano file to data format which is suitable to input to this Application.
        @Param doccano_file: The annotated file exported from doccano labeling platform.
        @Param save_ext_dir: The directory of ext data that you wanna save.
        @Param save_cls_dir: The directory of cls data that you wanna save.
        @Param splits: Whether to split doccano file into train/dev/test, note: Only []/ len(splits)==2 accepted.
        @Param is_shuffle: Whether to shuffle data.
    """
    if not os.path.exists(doccano_file):
        raise ValueError("Please input the correct path of doccano file.")

    if not os.path.exists(save_ext_dir):
        os.makedirs(save_ext_dir)

    if not os.path.exists(save_cls_dir):
        os.makedirs(save_cls_dir)

    if len(splits) != 0 and len(splits) != 2:
        raise ValueError("Only []/ len(splits)==2 accepted for splits.")

    if splits and (splits[0] >= splits[1] or splits[0] >= 1.0 or
                   splits[1] >= 1.0 or splits[0] <= 0. or splits[1] <= 0):
        raise ValueError(
            "Please set correct splits, the element in it should be in (0,1), and splits[1]>splits[0]."
        )

    def label_ext_with_label_term(ext_label, start, end, tag):

        if tag == "Opinion":
            b_tag = "B-Opinion"
            i_tag = "I-Opinion"
        else:
            b_tag = "B-Aspect"
            i_tag = "I-Aspect"

        ext_label[start] = b_tag
        for i in range(start + 1, end):
            ext_label[i] = i_tag

    ext_examples, cls_examples = [], []
    with open(doccano_file, "r", encoding="utf-8") as f:
        raw_examples = f.readlines()
    # start to label for ext and cls data
    for line in raw_examples:
        items = json.loads(line)
        text, label_terms = items["data"], items["label"]
        # label ext data with label_terms
        ext_label = ["O"] * len(text)
        aspect_mapper = {}
        for label_term in label_terms:
            start, end, tag = label_term
            label_ext_with_label_term(ext_label, start, end, tag)
            if tag == "Pos-Aspect":
                aspect_mapper[text[start:end]] = "1"
            elif tag == "Neg-Aspect":
                aspect_mapper[text[start:end]] = "0"
        ext_examples.append((text, " ".join(ext_label)))
        # label cls data
        aps = decoding(text, ext_label)
        for ap in aps:
            aspect, opinions = ap[0], list(set(ap[1:]))
            if aspect not in aspect_mapper:
                continue
            aspect_text = concate_aspect_and_opinion(text, aspect, opinions)
            cls_examples.append((aspect_mapper[aspect], aspect_text, text))

    # index for saving data
    ext_idx = np.arange(len(ext_examples))
    cls_idx = np.arange(len(cls_examples))

    if is_shuffle:
        ext_idx = np.random.permutation(ext_idx)
        cls_idx = np.random.permutation(cls_idx)

    if len(splits) == 0:
        # save ext data
        save_ext_path = os.path.join(save_ext_dir, "doccano.txt")
        save_examples(ext_examples, save_ext_path, ext_idx)
        print(f"\next: save data to {save_ext_path}.")
        # save cls data
        save_cls_path = os.path.join(save_cls_dir, "doccano.txt")
        save_examples(cls_examples, save_cls_path, cls_idx)
        print(f"\ncls: save data to {save_cls_path}.")

    else:
        # save ext data
        eth1, eth2 = int(len(ext_examples) * splits[0]), int(
            len(ext_examples) * splits[1])
        save_ext_train_path = os.path.join(save_ext_dir, "train.txt")
        save_ext_dev_path = os.path.join(save_ext_dir, "dev.txt")
        save_ext_test_path = os.path.join(save_ext_dir, "test.txt")
        save_examples(ext_examples, save_ext_train_path, ext_idx[:eth1])
        save_examples(ext_examples, save_ext_dev_path, ext_idx[eth1:eth2])
        save_examples(ext_examples, save_ext_test_path, ext_idx[eth2:])
        print(f"\next: save train data to {save_ext_train_path}.")
        print(f"ext: save dev data to {save_ext_dev_path}.")
        print(f"ext: save test data to {save_ext_test_path}.")

        # save cls data
        cth1, cth2 = int(len(cls_examples) * splits[0]), int(
            len(cls_examples) * splits[1])
        save_cls_train_path = os.path.join(save_cls_dir, "train.txt")
        save_cls_dev_path = os.path.join(save_cls_dir, "dev.txt")
        save_cls_test_path = os.path.join(save_cls_dir, "test.txt")
        save_examples(cls_examples, save_cls_train_path, cls_idx[:cth1])
        save_examples(cls_examples, save_cls_dev_path, cls_idx[cth1:cth2])
        save_examples(cls_examples, save_cls_test_path, cls_idx[cth2:])
        print(f"\ncls: save train data to {save_cls_train_path}.")
        print(f"cls: save dev data to {save_cls_dev_path}.")
        print(f"cls: save test data to {save_cls_test_path}.")

    # save ext dict
    ext_dict_path = os.path.join(save_ext_dir, "label.dict")
    cls_dict_path = os.path.join(save_cls_dir, "label.dict")
    save_dict(ext_dict_path, "ext")
    save_dict(cls_dict_path, "cls")
    print(f"\next: save dict to {ext_dict_path}.")
    print(f"cls: save dict to {cls_dict_path}.")
コード例 #35
0
ファイル: CompoundPred.py プロジェクト: YiwenZheng/Fersie
 def save_model(self, path_dir):
     if not os.path.exists(path_dir):
         os.makedirs(path_dir)
     torch.save(self.model.state_dict(), path_dir + '/model.pt')
     save_dict(path_dir, self.config)
コード例 #36
0
 def save_doc_posting(self):
     utils.save_dict(self.doc_posting_dict,
                     "doc_posting" + str(self.doc_posting_counter),
                     self.config.get_out_path())
     self.doc_posting_counter += 1
     self.doc_posting_dict = {}