Beispiel #1
0
def plot_many(logs, metric='accuracy', measure="mean", info=None):
    logs_0 = load("collab_log_100_0_234.pkl")
    logs_2 = load("collab_log_100_2_108.pkl")
    logs_10 = load("collab_log_100_10_776.pkl")
    # get correct metrics
    _metric = metric
    metric, measure = verify_metrics(metric, measure)
    data_0 = np.mean([[v[metric] for v in lo] for lo in logs_0.values()],
                     axis=0)
    data_2 = np.mean([[v[metric] for v in lo] for lo in logs_2.values()],
                     axis=0)
    data_10 = np.mean([[v[metric] for v in lo] for lo in logs_10.values()],
                      axis=0)

    # plot data
    xlabel = 'Number of rounds'
    ylabel = f'Test Accuracy'
    title = f'{_metric.capitalize()} vs. No. of rounds'
    if info:
        xlabel = info.get('xlabel', xlabel)
        ylabel = info.get('ylabel', ylabel)
        title = info.get('title', title)
    x = range(0, len(data_0) * EVAL_ROUND, EVAL_ROUND)
    # , color=colors[i], label=mean[i][1], linestyle=line_styles[i]
    plt.plot(x, data_0, label="Skip local step")  # , '-x'
    plt.plot(x, data_2, label="2 local epochs")  # , '-x'
    plt.plot(x, data_10, label="10 local epochs")  # , '-x'
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    # plt.title(title)
    plt.legend(loc="lower right", shadow=True)
    plt.show()
Beispiel #2
0
def word_day_occurrences(centrality_file_path=None,
                         centrality_measure='degree',
                         graph_path=PAJEK_FORMAT):

    if (centrality_file_path is None):
        print("Trying to load graph from file path: {}".format(graph_path))
        try:
            graph = load(graph_path)
        except FileNotFoundError:
            print('File not found on specified path.')
            sys.exit(1)

        if centrality_measure == 'degree':
            words = degree_centrality(graph)
        elif centrality_measure == 'closeness':
            words = closeness_centrality(graph)
        elif centrality_measure == 'betweenness':
            words = betweenness_centrality(graph)
        else:
            print('Specified centrality measure not implemented.')
            sys.exit(1)

        word_dict = sorted(words.items(),
                           key=operator.itemgetter(1),
                           reverse=True)[:10]
        words = [item[0] for item in word_dict]
    else:
        centrality_data = pd.read_csv(centrality_file_path, sep='\t')
        words = list(centrality_data.iloc[:, 0])

    word_day_dict = get_word_attributes(graph_path)
    for word in words:
        print("Word: {},  Days of occurrence: {}\n".format(
            word, word_day_dict[word]))
Beispiel #3
0
def plot_mean_var(k_type='triangular', sim=False, labels_names=None, label_ids=None):
    from collections import defaultdict, OrderedDict
    import itertools

    xlim = None #[200, 300]
    figures_fol = op.join(root_fol, 'figures', 'smss_per_label{}{}'.format(
        '_window' if not xlim is None else '', '_sim' if sim else ''))
    utils.make_dir(figures_fol)
    gen, data = {}, {}
    hs_plot = [5, 10, 15, 25]
    if labels_names is None:
        labels_names = utils.load(op.join(root_fol, 'labels_names.pkl'))
        label_ids = range(len(labels_names))
    for fol, subject, sms, run in utils.sms_generator(root_fol):
        print(fol, subject, sms, run)
        if subject not in gen:
            gen[subject] = OrderedDict()
        if sms not in gen[subject]:
            gen[subject][sms] = []
        gen[subject][sms].append((fol, run))
        if not sim:
            d = np.load(op.join(fol, 'labels_data_{}_{}.npz'.format(atlas, measure)))
            data[fol] = d['data']
        else:
            import scipy.io as sio
            d_sim = sio.loadmat(op.join(fol, 'fmri_timecourse_sim.mat'))
            data[fol] = d_sim['timecourse_use_sim'].T

    # labels_names = [0]
    now = time.time()
    for subject, (label_id, label_name) in itertools.product(gen.keys(), zip(label_ids, labels_names)):
        utils.time_to_go(now, label_id, len(labels_names), 5)
        fig, axes = plt.subplots(2, 2, sharex='col', sharey='row', figsize=(12, 8))
        axs = list(itertools.chain(*axes))
        # fig.suptitle('{} {} {}'.format(subject, label_name, 'sim' if sim else ''))
        for index, (ax, sms) in enumerate(zip(axs, gen[subject].keys())):
            fol, run = gen[subject][sms][0]
            tr = utils.load(op.join(fol, 'tr.pkl'))
            print(tr)
            ys = np.array([data[fol][label_id]])
            plot_vector_mean_var(subject, sms, run, ys, [label_name], [label_id], fol, tr, hs_plot, k_type, sim,
                                 overwrite=False, ax=ax, plot_legend=index==1, xlim=None)

        utils.maximize_figure(plt)
        plt.tight_layout()
        plt.savefig(op.join(figures_fol, label_name))
        plt.close()
def get_dataset_iemocap(data_folder: str,
                        phase: str,
                        img_interval: int,
                        hand_crafted_features: Optional[bool] = False):
    main_folder = os.path.join(data_folder, 'IEMOCAP_RAW_PROCESSED')
    meta = load(os.path.join(main_folder, 'meta.pkl'))

    emoDict = getEmotionDict()
    uttr_ids = open(
        os.path.join(data_folder, 'IEMOCAP_SPLIT', f'{phase}_split.txt'),
        'r').read().splitlines()
    texts = [meta[uttr_id]['text'] for uttr_id in uttr_ids]
    labels = [emoDict[meta[uttr_id]['label']] for uttr_id in uttr_ids]

    if hand_crafted_features:
        text_features = load(
            os.path.join(data_folder, 'IEMOCAP_HCF_FEATURES',
                         f'{phase}_text_features.pt'))
        audio_features = load(
            os.path.join(data_folder, 'IEMOCAP_HCF_FEATURES',
                         f'{phase}_audio_features.pt'))
        video_features = load(
            os.path.join(data_folder, 'IEMOCAP_HCF_FEATURES',
                         f'{phase}_video_features.pt'))

        # Select only the FAUs
        for uttrId in video_features.keys():
            for imgId in video_features[uttrId].keys():
                video_features[uttrId][imgId] = video_features[uttrId][imgId][
                    -35:]

        this_dataset = IEMOCAP_baseline(utterance_ids=uttr_ids,
                                        texts=text_features,
                                        video_features=video_features,
                                        audio_features=audio_features,
                                        labels=labels,
                                        label_annotations=list(emoDict.keys()),
                                        img_interval=img_interval)
    else:
        this_dataset = IEMOCAP(main_folder=main_folder,
                               utterance_ids=uttr_ids,
                               texts=texts,
                               labels=labels,
                               label_annotations=list(emoDict.keys()),
                               img_interval=img_interval)

    return this_dataset
Beispiel #5
0
def load_cfg() -> Tuple[Config, str]:
    from src.dict2obj import Config
    from src.base import Adversary
    from src.utils import gpu, load, set_seed

    cfg = Config()
    set_seed(opts.seed)

    # load the model
    model = load_model(opts.model)(num_classes=get_num_classes(opts.dataset))
    device = gpu(model)
    load(model=model, path=opts.info_path, device=device)

    # load the testset
    testset = load_dataset(dataset_type=opts.dataset,
                           transform=opts.transform,
                           train=False)
    cfg['testloader'] = load_dataloader(dataset=testset,
                                        batch_size=opts.batch_size,
                                        train=False,
                                        show_progress=opts.progress)
    normalizer = load_normalizer(dataset_type=opts.dataset)

    # generate the log path
    _, log_path = generate_path(method=METHOD,
                                dataset_type=opts.dataset,
                                model=opts.model,
                                description=opts.description)

    # set the attacker
    attack, bounds, preprocessing = load_attacks(attack_type=opts.attack,
                                                 dataset_type=opts.dataset,
                                                 stepsize=opts.stepsize,
                                                 steps=opts.steps)

    epsilons = torch.linspace(opts.epsilon_min, opts.epsilon_max,
                              opts.epsilon_times).tolist()
    cfg['attacker'] = Adversary(model=model,
                                attacker=attack,
                                device=device,
                                bounds=bounds,
                                preprocessing=preprocessing,
                                epsilon=epsilons)

    return cfg, log_path
Beispiel #6
0
def plot_train_history(logs,
                       metric='accuracy',
                       measure="mean",
                       info=None,
                       plot_peer=None):
    if isinstance(logs, str):
        logs = load(logs)
    # get correct metrics
    _metric = metric
    metric, measure = verify_metrics(metric, measure)
    # prepare data
    std_data = None
    if measure == "mean":
        data = np.mean([[v[metric] for v in lo] for lo in logs.values()],
                       axis=0)
    elif measure == "mean-std":
        if plot_peer is None:
            data = np.mean([[v[metric] for v in lo] for lo in logs.values()],
                           axis=0)
        else:
            print(f">>>>> Plotting chart for Peer({plot_peer})...")
            data = [v[metric] for v in logs[plot_peer]]
        std_data = np.std([[v[metric] for v in lo] for lo in logs.values()],
                          axis=0)
    elif measure == "max":
        data = np.max([[v[metric] for v in lo] for lo in logs.values()],
                      axis=0)
    else:
        data = np.std([[v[metric] for v in lo] for lo in logs.values()],
                      axis=0)
    # plot data
    xlabel = 'Rounds'
    ylabel = f' {measure.capitalize()} {_metric.capitalize()}'
    title = f'{_metric.capitalize()} vs. No. of rounds'
    if info:
        xlabel = info.get('xlabel', xlabel)
        ylabel = info.get('ylabel', ylabel)
    x = range(0, len(data) * EVAL_ROUND, EVAL_ROUND)
    # Configs
    plt.grid(linestyle='dashed')
    plt.xlabel(xlabel, fontsize=13)
    plt.xticks(fontsize=13, )
    plt.ylabel(ylabel, fontsize=13)
    plt.yticks(fontsize=13, )
    # Plot
    plt.plot(x, data)
    if std_data is not None:
        plt.fill_between(x, data - std_data, data + std_data, alpha=.1)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    # plt.title(title)

    plt.show()
Beispiel #7
0
def load_inception(
        resize: bool = True,
        normalizer: Optional[Callable] = None
) -> Tuple[nn.Module, torch.device]:

    file_ = os.path.join(INFO_PATH, INCEPTION_V3)
    if not os.path.exists(file_):
        print(">>> Inception model is not found. Download from url ...")
        inception_model = inception_v3(pretrained=True, transform_input=False)
        device = gpu(inception_model)
        torch.save(inception_model.state_dict(), file_)
    else:
        print(f">>> Inception model is found: {file_} ...")
        inception_model = Inception3(transform_input=False)
        device = gpu(inception_model)
        load(model=inception_model,
             path=INFO_PATH,
             filename=INCEPTION_V3,
             device=device)
    model = _Net(arch=inception_model, resize=resize, normalizer=normalizer)
    return model, device
Beispiel #8
0
def load_cfg() -> Tuple[Config, str]:
    from src.dict2obj import Config
    from src.utils import gpu, load, set_seed

    cfg = Config()
    set_seed(opts.seed)

    # load the model
    model = load_model(opts.model)(num_classes=get_num_classes(opts.dataset))
    device = gpu(model)
    load(model=model, path=opts.info_path, device=device)
    model.eval()

    # load the testset
    testset = load_dataset(dataset_type=opts.dataset,
                           transform='None',
                           train=False)
    data = []
    targets = []
    for i in range(len(testset)):
        img, label = testset[i]
        data.append(img)
        targets.append(label)

    cfg['data'] = torch.stack(data)
    cfg['targets'] = torch.tensor(targets, dtype=torch.long)
    normalizer = load_normalizer(opts.dataset)

    # generate the log path
    _, log_path = generate_path(METHOD, opts.dataset, opts.model,
                                opts.description)

    cfg['attacker'] = AutoAttack(Defense(model, normalizer),
                                 norm=opts.norm,
                                 eps=opts.epsilon,
                                 version=opts.version,
                                 device=device)

    return cfg, log_path
def get_dataset_mosei(data_folder: str,
                      phase: str,
                      img_interval: int,
                      hand_crafted_features: Optional[bool] = False):
    main_folder = os.path.join(data_folder, 'MOSEI_RAW_PROCESSED')
    meta = load(os.path.join(main_folder, 'meta.pkl'))

    ids = open(os.path.join(data_folder, 'MOSEI_SPLIT', f'{phase}_split.txt'),
               'r').read().splitlines()
    texts = [meta[id]['text'] for id in ids]
    labels = [meta[id]['label'] for id in ids]

    if hand_crafted_features:
        hcf = load(
            os.path.join(data_folder, 'MOSEI_HCF_FEATURES',
                         f'mosei_senti_hcf_{phase}.pkl'))
        return MOSEI_baseline(ids=ids, hcf=hcf, labels=labels)

    return MOSEI(main_folder=main_folder,
                 ids=ids,
                 texts=texts,
                 labels=labels,
                 img_interval=img_interval)
Beispiel #10
0
def plot_mean_var(y, hs, t_axis, fol, k_type='triangular'):
    if not op.isfile(op.join(fol, 'ests_mean_{}.pkl'.format(k_type))):
        ests_mean, ests_var = {}, {}
        for h in hs:
            ests_mean[h] = est_mean(y, h, t_axis, k_type)
            ests_var[h] = est_var(y, ests_mean[h], h, t_axis, k_type)
        utils.save(ests_mean, op.join(fol, 'ests_mean_{}.pkl'.format(k_type)))
        utils.save(ests_var, op.join(fol, 'ests_var_{}.pkl'.format(k_type)))
    else:
        ests_mean = utils.load(op.join(fol, 'ests_mean_{}.pkl'.format(k_type)))
        ests_var = utils.load(op.join(fol, 'ests_var_{}.pkl'.format(k_type)))

    boynton_colors = ["red", "green", "yellow", "magenta", "pink", "orange", "brown", "gray"]
    t = range(len(y))
    plt.figure()
    plt.plot(t, y, 'b', label='real')
    for h, color in zip(hs, boynton_colors):
        # plt.errorbar(t, ests_mean[h], c=color, yerr=np.power(ests_var[h], 0.5), label='w {}'.format(h))
        plt.plot(t, ests_mean[h], color, label='w {}'.format(h))
        error = np.power(ests_var[h], 0.5)
        plt.fill_between(t, ests_mean[h] - error, ests_mean[h] + error,
                        alpha=0.2, edgecolor=color, facecolor=color)
    plt.legend()
    plt.savefig(op.join(fol, 'first_vertice_h_var_k_{}.jpg'.format(k_type)))
def run(config_name, config_folder, session_name, multi_session, pe, on_load):
    lowpriority()
    if session_name is None:
        session_name = config_name if multi_session <= 1 else f"{config_name}-x{multi_session}"
        session_name = session_name.replace(".ini", "")

    # Load config
    config = ConfigParser()
    read_ok = config.read(f"{config_folder}/{config_name}")
    assert len(read_ok) > 0, f"Failed to read config file: {Path(config_folder) / config_name}"

    # Define modules here. NOTE no longer need. Module and path can be specified directly in config file now
    if "Controller" in config and "module" in config["Controller"]:
        module = config["Controller"]["module"]
        if module == "CopyNTM":
            model_builder = lambda: CopyNTM(**dict([(key, int(value)) for key, value in config["NTM"].items()]))
        elif module == "TMazeNTMModule":
            model_builder = lambda: TMazeNTMModule(**dict([(k, int(v)) for k, v in config["ModelParameters"].items()]))
        else:
            raise AssertionError(f"Unknown module specification: {module}")
    else:
        model_builder = None

    # Set seed
    seed = config["HyperParameters"].get("seed") if "HyperParameters" in config else None
    if seed:
        seed = int(seed)
        torch.manual_seed(seed)
        numpy.random.seed(seed)
        random.seed(seed)

    # Create worker(s) and session
    workers = []
    for i in range(max(1, multi_session)):
        workers.append(GA(f"{config_folder}/{config_name}", model_builder=model_builder))
    if multi_session > 1:
        session = MultiSession(workers, session_name, parallel_execution=pe)
    else:
        session = Session(workers[0], session_name)

    # Copy config file to session folder
    if not os.path.isfile(Path(session.save_folder) / "config"):
        with open(Path(session.save_folder) / "config", "a") as fp:
            config.write(fp)
    if on_load:
        on_load = load(on_load)
    session.start(on_load=on_load)
Beispiel #12
0
def run(dataset, seed, ntrees, n_folds, n_repeats, dataset_results_path):
    start = int(time())

    random_state = np.random.RandomState(seed)
    numerical_attributes, attr_names, attr_values, x, y = utils.load(dataset)
    random_forest = RandomForest(ntrees, random_state, numerical_attributes,
                                 attr_values)

    results = []
    for i in range(n_repeats):
        start2 = int(time())
        print("\t\t#{} {}-fold CV iteration".format(i, n_folds))
        results += utils.cross_validate(random_forest, x, y, n_folds,
                                        random_state)
        print("\t\t" + readable_time(start2, int(time())))

    stop = int(time())
    print("\t\tSaving results...", end=" ")

    results_file_path = os.path.join(dataset_results_path,
                                     '{}.csv'.format(ntrees))
    with open(results_file_path, 'w') as file:
        file.write('f1_score\n')

        for f1_score in results:
            file.write("{}\n".format(f1_score))

    time_file_path = os.path.join(dataset_results_path, 'summary.csv')
    with open(time_file_path, 'a') as file:
        file.write('{},{},{},{},{},{},{}\n'.format(dataset, np.mean(results),
                                                   np.std(results), ntrees,
                                                   n_folds, n_repeats,
                                                   stop - start))

    # for i in range(2):
    #     dot = graphviz.Digraph(name=str(i))
    #     random_forest.trees[i].get_graph(dot, attr_names=attr_names)
    #     dot.render(cleanup=True)

    print("Done")
    print("\t\tTime elapsed: {}".format(readable_time(start, stop)))
Beispiel #13
0
    def __init__(self, x=np.array([]), Fs=1.0, t0=0.0, path=None):

        if path is not None:
            # signal should be imported
            # from a WAV file
            x, Fs = load(path)
        else:
            # signal should be taken
            # from the 'x' argument
            x = x.astype(np.float64)

        self.x = x
        self.Fs = Fs
        self.L = np.size(self.x)
        self.t = np.arange(self.L) / self.Fs + t0

        self.PLOTS = {
            "time": self._plot_time,
            "frequency": self._plot_freq,
            "spectrogram": self._plot_spec,
            "cepstrogram": self._plot_cepstrogram,
            "lpctrogram": self._plot_lpctrogram
        }
Beispiel #14
0
from src.dtree import DecisionTree
from src.utils import load
import graphviz

numerical_attributes, attribute_names, attribute_values, x, y = load(
    'benchmark')

dtree = DecisionTree(None,
                     numerical_attributes,
                     attribute_values,
                     benchmark=True)
dtree.fit(x, y)

dot = graphviz.Digraph(name='tests/results/benchmark_tree')
dtree.get_graph(dot, attr_names=attribute_names)
dot.render(cleanup=True)
Beispiel #15
0
    top_hs = 120

    # hs_plot = [5, 10, 15, 25]
    # hs = [8,  13,  18, 23]
    k_types = ['triangular'] # 'Epanechnikov', 'tricube'
    only_one_trace = False
    # legend_index = 1
    # labels_names = ['posteriorcingulate-lh']
    # figures_fol = op.join(root_fol, 'figures', 'smss_per_label_window')
    # utils.make_dir(figures_fol)
    overwrite = False
    sim = False
    n_jobs = -1
    n_jobs = utils.get_n_jobs(n_jobs)
    specific_label = 'posteriorcingulate-lh'
    labels_names = utils.load(op.join(root_fol, 'labels_names.pkl'))
    labels_ids = range(len(labels_names))

    specific_label = 'posteriorcingulate-lh'
    # for sim in [False, True]:
    subjects = set()
    for fol, subject, sms, run in utils.sms_generator(root_fol):
        if subject == 'nmr00956':
            continue
        subjects.add(subject)
        fmri_fname = op.join(fol, 'fmcpr.sm5.{}.{}.mgz'.format(fsaverage, hemi))
        tr = utils.load(op.join(fol, 'tr.pkl'))
        print(subject, sms, run, tr)
        # main(subject, sms, run, fmri_fname, fol, root_fol, atlas, tr, hs, k_types, measure, sim, labels_names,
        #      labels_ids, only_one_trace, overwrite, specific_label, n_jobs=n_jobs)
def get_data(args, phase):
    dataset = args['dataset']
    seq_len = args['data_seq_len']
    file_folder = args['data_folder']
    aligned = args['aligned']

    zsl = args['zsl']
    fsl = args['fsl'] if phase == 'train' else -1

    processed_path = f'./processed_datasets/{dataset}_{seq_len}_{phase}{"" if aligned else "_noalign"}.pt'
    if os.path.exists(processed_path) and zsl == -1 and fsl == -1:
        print(f'Load processed dataset! - {phase}')
        return load(processed_path)

    if dataset == 'mosi':
        if seq_len == 20:
            data_path = os.path.join(file_folder, f'X_{phase}.h5')
            label_path = os.path.join(file_folder, f'y_{phase}.h5')
            data = np.array(h5py.File(data_path, 'r')['data'])
            labels = np.array(
                h5py.File(label_path.replace('X', 'y'), 'r')['data'])
            text = data[:, :, :300]
            audio = data[:, :, 300:305]
            vision = data[:, :, 305:]
            this_dataset = MOSI(list(range(len(labels))),
                                text,
                                audio,
                                vision,
                                labels,
                                is20=True)
        else:
            data_path = os.path.join(
                file_folder, f'mosi_data{"" if aligned else "_noalign"}.pkl')
            data = load(data_path)
            data = data[phase]
            this_dataset = MOSI(data['id'], data['text'], data['audio'],
                                data['vision'], data['labels'])
    # elif dataset == 'mosei_senti':
    #     if seq_len == 20:
    #         text_data = np.array(h5py.File(os.path.join(file_folder, f'text_{phase}.h5'), 'r')['d1'])
    #         audio_data = np.array(h5py.File(os.path.join(file_folder, f'audio_{phase}.h5'), 'r')['d1'])
    #         vision_data = np.array(h5py.File(os.path.join(file_folder, f'vision_{phase}.h5'), 'r')['d1'])
    #         labels = np.array(h5py.File(os.path.join(file_folder, f'y_{phase}.h5'), 'r')['d1'])
    #         this_dataset = MOSEI(list(range(len(labels))), text_data, audio_data, vision_data, labels)
    #     else:
    #         data_path = os.path.join(file_folder, f'mosei_senti_data{"" if aligned else "_noalign"}.pkl')
    #         data = load(data_path)
    #         data = data[phase]
    #         this_dataset = MOSEI(data['id'], data['text'], data['audio'], data['vision'], data['labels'])
    elif dataset == 'mosei_emo':
        text_data = np.array(
            h5py.File(os.path.join(file_folder, f'text_{phase}_emb.h5'),
                      'r')['d1'])
        audio_data = np.array(
            h5py.File(os.path.join(file_folder, f'audio_{phase}.h5'),
                      'r')['d1'])
        vision_data = np.array(
            h5py.File(os.path.join(file_folder, f'video_{phase}.h5'),
                      'r')['d1'])
        labels = np.array(
            h5py.File(os.path.join(file_folder, f'ey_{phase}.h5'),
                      'r')['d1'])  # (N, 6)

        # Class order: Anger Disgust Fear Happy Sad Surprise
        labels = np.array(labels > 0, np.int32)

        this_dataset = MOSEI(list(range(len(labels))),
                             text_data,
                             audio_data,
                             vision_data,
                             labels,
                             zsl=zsl,
                             fsl=fsl)
    elif dataset == 'iemocap':
        data_path = os.path.join(
            file_folder, f'iemocap_data{"" if aligned else "_noalign"}.pkl')
        data = load(data_path)
        data = data[phase]

        # iemocap4 Distribution
        # neutral happy sad angry
        # [954    338   690 735]
        # [358    116   188 136]
        # [383    135   193 227]
        text_data = data['text']
        audio_data = data['audio']
        vision_data = data['vision']
        labels = data['labels']
        labels = np.argmax(labels, axis=-1)

        if zsl != -1:
            # iemocap9 Distribution
            # 0     1       2    3   4         5          6     7       8
            # Anger Excited Fear Sad Surprised Frustrated Happy Neutral Disgust
            # [735  686     19   690  65       1235       338   954     1] (Train)
            # [136  206     9    188  17       319        116   358     0] (Valid)
            # [227  141     12   193  25       278        135   383     1] (Test)
            iemocap9_text_data = load2(
                os.path.join(file_folder, f'text_{phase}.p'))
            iemocap9_audio_data = load2(
                os.path.join(file_folder, f'covarep_{phase}.p'))
            iemocap9_vision_data = load2(
                os.path.join(file_folder, f'facet_{phase}.p'))
            iemocap9_labels = load2(os.path.join(file_folder, f'y_{phase}.p'))
            iemocap9_labels = iemocap9_labels[:, 1:-1]
            iemocap9_labels = np.expand_dims(iemocap9_labels[:, zsl], axis=1)

            nonzeros = [
                i for i, l in enumerate(iemocap9_labels) if np.sum(l) != 0
            ]
            zsl_text_data = iemocap9_text_data[nonzeros]
            zsl_audio_data = iemocap9_audio_data[nonzeros]
            zsl_vision_data = iemocap9_vision_data[nonzeros]
            zsl_labels = iemocap9_labels[nonzeros]

            # Align seq len to 20
            zsl_text_data = zsl_text_data[:, :-1, :]
            zsl_audio_data = zsl_audio_data[:, :-1, :]
            zsl_vision_data = zsl_vision_data[:, :-1, :]

            text_data = np.concatenate((text_data, zsl_text_data), axis=0)
            audio_data = np.concatenate((audio_data, zsl_audio_data), axis=0)
            vision_data = np.concatenate((vision_data, zsl_vision_data),
                                         axis=0)

            labels = np.concatenate((labels, np.zeros((len(labels), 1))),
                                    axis=1)
            zsl_labels = np.concatenate((np.zeros(
                (len(zsl_labels), 4)), zsl_labels),
                                        axis=1)
            labels = np.concatenate((labels, zsl_labels), axis=0)
        this_dataset = IEMOCAP(list(range(len(labels))), text_data, audio_data,
                               vision_data, labels)
    else:
        raise ValueError('Wrong dataset!')

    if zsl == -1 and fsl == -1:
        save(this_dataset, processed_path)

    return this_dataset
Beispiel #17
0
 def load(self):
     utils.load(
         self.model,
         os.path.join(self.args.save,
                      'weights_{}.pt'.format(self.args.task_id)))
def get_glove_emotion_embs(path):
    dataDict = load(path)
    return dataDict
    def __init__(self,
                 config_file=None,
                 env_keys=None,
                 population=None,
                 model_builder=None,
                 max_generations=None,
                 max_evals=None,
                 max_reward=None,
                 max_episode_eval=None,
                 sigma=None,
                 truncation=None,
                 trials=None,
                 elite_trials=None,
                 n_elites=None,
                 env_selection=None,
                 sigma_strategy=None,
                 termination_strategy=None,
                 env_wrappers=None):

        self.config_file = config_file
        config = configparser.ConfigParser()
        default_config = Path(os.path.realpath(
            __file__)).parent.parent / 'config_files/config_default.cfg'
        read_ok = config.read(
            [default_config, config_file] if config_file else default_config)
        if len(read_ok) != 2:
            print("Warning: Failed to read all config files: " +
                  str([self.config_file, default_config]))

        # hyperparams
        self.env_keys = env_keys if env_keys is not None else json.loads(
            config.get('EnvironmentSettings', 'env_keys'))
        if not env_keys and config.get('EnvironmentSettings', 'env_key'):
            self.env_keys.append(config.get('EnvironmentSettings', 'env_key'))
        self.population = population if population is not None else int(
            config['HyperParameters']['population'])
        self.model_builder = model_builder or self._load_model_builder(config)
        self.max_episode_eval = max_episode_eval if max_episode_eval is not None else int(
            config['HyperParameters']['max_episode_eval'])
        self.max_evals = max_evals if max_evals is not None else int(
            config['HyperParameters']['max_evals'])
        self.max_reward = max_reward if max_reward is not None else float(
            config['HyperParameters']['max_reward'])
        self.max_generations = max_generations if max_generations is not None else int(
            config['HyperParameters']['max_generations'])
        self.sigma = sigma if sigma is not None else float(
            config['HyperParameters']['sigma'])
        self.truncation = truncation if truncation is not None else int(
            config['HyperParameters']['truncation'])
        self.trials = trials if trials is not None else int(
            config['HyperParameters']['trials'])
        self.elite_trials = elite_trials if elite_trials is not None else int(
            config['HyperParameters']['elite_trials'])
        self.n_elites = n_elites if n_elites is not None else int(
            config['HyperParameters']['n_elites'])

        # strategies
        if sigma_strategy and not isinstance(sigma_strategy, str):
            sigma_strategies["Custom"] = sigma_strategy
            sigma_strategy = "Custom"
        if env_selection and not isinstance(env_selection, str):
            env_selections["Custom"] = env_selection
            env_selection = "Custom"
        if termination_strategy and not isinstance(termination_strategy, str):
            termination_strategies["Custom"] = termination_strategy
            termination_strategy = "Custom"

        self.sigma_strategy_name = sigma_strategy or config['Strategies'][
            'sigma_strategy']
        self.env_selection_name = env_selection or config['Strategies'][
            'env_selection']
        self.termination_strategy_name = termination_strategy or config[
            'Strategies']['termination']

        self.sigma_strategy = sigma_strategies[self.sigma_strategy_name]
        self.env_selection = env_selections[self.env_selection_name]
        self.termination_strategy = termination_strategies[
            self.termination_strategy_name]

        # Environment wrappers
        self.env_wrappers = env_wrappers or config["EnvironmentSettings"].get(
            "env_wrappers") or []
        if isinstance(self.env_wrappers, str):
            self.env_wrappers = [
                utils.load(s) for s in json.loads(self.env_wrappers)
            ]

        def wrap(env, i=0):
            if len(self.env_wrappers) > i:
                return wrap(self.env_wrappers[i](env), i + 1)
            return env

        # Safe-gaurd for max evaluations if not specified
        self.max_episode_eval = 1000000000 if self.max_episode_eval < 0 else self.max_episode_eval
        self.max_evals = 1000000000 if self.max_evals < 0 else self.max_evals

        # algorithm state
        self.g = 0
        self.envs = [wrap(gym.make(key)) for key in self.env_keys]
        self.evaluations_used = 0
        self.results = []
        self.active_env = 0
        self.scored_parents = None
        self.models = None
Beispiel #20
0
    def train(self):
        extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(extra_update_ops):
            g_optim = tf.train.AdamOptimizer(self.flags.lr, beta1 = self.flags.beta1) \
                .minimize(self.g_loss, var_list = self.g_vars)
            e_optim = tf.train.AdamOptimizer(self.flags.lr, beta1 = self.flags.beta1) \
                .minimize(self.e_loss, var_list = self.e_vars)

            # g_optim = tf.train.AdagradOptimizer(self.flags.lr).minimize(self.g_loss, var_list = self.g_vars)
            # e_optim = tf.train.AdagradOptimizer(self.flags.lr).minimize(self.e_loss, var_list = self.e_vars)


        tf.global_variables_initializer().run()

        # merge summary
        sum_total = tf.summary.merge([self.im_sum, self.im_hat_sum, self.recon_im_sum,
            self.g_loss_sum, self.KL_fake_g_loss_sum,
            self.e_loss_sum,self.KL_fake_e_loss_sum, self.KL_real_e_loss_sum])

        if hasattr(self, 'match_x_e_loss_sum'):
            sum_total = tf.summary.merge([sum_total, self.match_x_e_loss_sum])

        if hasattr(self, 'match_z_e_loss_sum'):
            sum_total = tf.summary.merge([sum_total, self.match_z_e_loss_sum])

        if hasattr(self, 'match_z_g_loss_sum'):
            sum_total = tf.summary.merge([sum_total, self.match_z_g_loss_sum])

        if hasattr(self, 'match_x_g_loss_sum'):
            sum_total = tf.summary.merge([sum_total, self.match_x_g_loss_sum])


        writer = tf.summary.FileWriter("%s/ge_GAN_log_%s"%(self.flags.checkpoint_dir, self.flags.dataset_name), self.sess.graph)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord = coord )

        could_load, checkpoint_counter = load(self.saver, self.sess, self.flags)
        if could_load:
            counter = checkpoint_counter
            print(" [*] Load SUCCESS")
        else:
            counter = 0
            print(" [!] Load failed...")

        for i in xrange(counter, self.flags.iter):
            i += 1
            for iter_e in range(self.updates['e']['num_updates']):
                if iter_e < (self.updates['e']['num_updates'] -1 ):
                    self.sess.run([e_optim])
                else:
                    # run with loss
                    # self.sess.run([e_optim])
                    _, e_loss_, KL_real_e_loss_, KL_fake_e_loss_ = self.sess.run(
                                        [e_optim, self.e_loss, self.KL_real_e_loss, self.KL_fake_e_loss])

            for iter_g in range(self.updates['g']['num_updates']):
                if iter_g < (self.updates['g']['num_updates'] -1):
                    self.sess.run([g_optim])
                else :
                    # run with summary and loss
                    # self.sess.run([g_optim])
                    _, sum_total_, g_loss_, KL_fake_g_loss_ = self.sess.run(
                                [g_optim, sum_total, self.g_loss, self.KL_fake_g_loss])

            if np.mod(i,20) == 0:
                writer.add_summary(sum_total_, i)
                print("iteration: [%2d], g_loss: %.8f, e_loss: %.8f" % (i, g_loss_, e_loss_))
                print ('fake/real_ e: ', KL_fake_e_loss_, '\\', KL_real_e_loss_)
                print ('fake_g: ', KL_fake_g_loss_)
                print('**************************')

            if np.mod(i,self.flags.save_iter) == 0 or i == self.flags.iter:
                # try to sample and save model
                [gt_im, recon_im] = self.sess.run([self.im, self.recon_im_test])
                save_images(self.flags, gt_im, i, 'GT')
                save_images(self.flags, recon_im, i, 'recon')

                save(self.saver, self.sess, self.flags, i)
                print ('saved once ...')
Beispiel #21
0
    def train(self):
        extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(extra_update_ops):
            g_optim = tf.train.AdamOptimizer(self.flags.lr, beta1 = self.flags.beta1) \
                .minimize(self.g_loss, var_list = self.g_vars)
            e_optim = tf.train.AdamOptimizer(self.flags.lr, beta1 = self.flags.beta1) \
                .minimize(self.e_loss, var_list = self.e_vars)
            # d1_optim = tf.train.AdamOptimizer(self.flags.lr, beta1 = self.flags.beta1) \
            #     .minimize(self.d1_loss, var_list = self.d1_vars)
            # d2_optim = tf.train.AdamOptimizer(self.flags.lr, beta1 = self.flags.beta1) \
            #     .minimize(self.d2_loss, var_list = self.d2_vars)
            d1_optim = tf.train.RMSPropOptimizer(self.flags.lr).minimize(
                self.d1_loss, var_list=self.d1_vars)
            d2_optim = tf.train.RMSPropOptimizer(self.flags.lr).minimize(
                self.d2_loss, var_list=self.d2_vars)
        tf.global_variables_initializer().run()

        # merge summary
        # sum_total = tf.summary.merge([self.im_sum, self.im_hat_sum, self.recon_im_sum,
        #     self.g_loss_sum, self.KL_fake_g_loss_sum, self.d1_g_loss_sum, self.d2_im_hat_g_loss_sum,
        #     self.e_loss_sum,self.KL_fake_e_loss_sum, self.KL_real_e_loss_sum, self.d2_z_hat_e_loss_sum,
        #     self.d1_loss_sum, self.d2_loss_sum
        #     ])
        sum_total = tf.summary.merge([
            self.imh_sum, self.imh_hat_sum, self.g_loss_sum,
            self.d1_g_loss_sum, self.d2_imh_hat_g_loss_sum, self.e_loss_sum,
            self.d2_iml_hat_e_loss_sum, self.d1_loss_sum, self.d2_loss_sum
        ])

        writer = tf.summary.FileWriter(
            "%s/sr_GAN_log_%s" %
            (self.flags.checkpoint_dir, self.flags.dataset_name),
            self.sess.graph)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        could_load, checkpoint_counter = load(self.saver, self.sess,
                                              self.flags)
        if could_load:
            counter = checkpoint_counter
            print(" [*] Load SUCCESS")
        else:
            counter = 0
            print(" [!] Load failed...")

        for i in xrange(counter, self.flags.iter):
            i += 1

            _, _, d1_loss_, d2_loss_ = self.sess.run(
                [d1_optim, d2_optim, self.d1_loss, self.d2_loss])

            for iter_e in range(self.updates['e']['num_updates']):
                if iter_e < (self.updates['e']['num_updates'] - 1):
                    self.sess.run([e_optim])
                else:
                    # run with loss
                    _, e_loss_ = self.sess.run([e_optim, self.e_loss])

            for iter_g in range(self.updates['g']['num_updates']):
                if iter_g < (self.updates['g']['num_updates'] - 1):
                    self.sess.run([g_optim])
                else:
                    # run with summary and loss
                    _, sum_total_, g_loss_, = self.sess.run(
                        [g_optim, sum_total, self.g_loss])

            if np.mod(i, 10) == 0:
                writer.add_summary(sum_total_, i)
                print("iteration: [%2d], g_loss: %.8f, e_loss: %.8f, d1_loss: %.8f, d2_loss: %.8f" \
                        % (i, g_loss_, e_loss_, d1_loss_, d2_loss_ ))
                print('**************************')

            if np.mod(i, self.flags.save_iter) == 0 or i == self.flags.iter:
                # try to sample and save model
                [low_im, high_im, fakehigh_im
                 ] = self.sess.run([self.iml, self.imh, self.imh_hat])
                save_images(self.flags, low_im, i, 'iml')
                save_images(self.flags, high_im, i, 'imh')
                save_images(self.flags, fakehigh_im, i, 'imh_hat')

                save(self.saver, self.sess, self.flags, i)
                print('saved once ...')
Beispiel #22
0
def main(arguments=None):
    """The main function
    Entry point.
    """
    global loss_func
    global best_acc
    best_acc = 0
    global args

    # Setting the hyper parameters
    parser = argparse.ArgumentParser(description='Example of Capsule Network')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        help='number of training epochs. default=10')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='learning rate. default=0.01')
    parser.add_argument('--batch-size',
                        type=int,
                        default=128,
                        help='training batch size. default=128')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=128,
                        help='testing batch size. default=128')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        help=
        'how many batches to wait before logging training status. default=10')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training. default=false')
    parser.add_argument('--device',
                        type=str,
                        default='cuda:0',
                        help='select the gpu.  default=cuda:0')
    parser.add_argument(
        '--threads',
        type=int,
        default=4,
        help='number of threads for data loader to use. default=4')
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help='random seed for training. default=42')
    parser.add_argument(
        '--num_conv_in_channels',
        type=int,
        default=1,
        help='number of channels in input to first Conv Layer.  default=1')
    parser.add_argument(
        '--num_conv_out_channels',
        type=int,
        default=256,
        help='number of channels in output from first Conv Layer.  default=256'
    )
    parser.add_argument('--conv-kernel',
                        type=int,
                        default=9,
                        help='kernel size of Conv Layer.  default=9')
    parser.add_argument('--conv-stride',
                        type=int,
                        default=1,
                        help='stride of first Conv Layer.  default=1')
    parser.add_argument(
        '--num-primary-channels',
        type=int,
        default=32,
        help='channels produced by PimaryCaps layer.  default=32')
    parser.add_argument(
        '--primary-caps-dim',
        type=int,
        default=8,
        help='dimension of capsules in PrimaryCaps layer.  default=8')
    parser.add_argument(
        '--primary-kernel',
        type=int,
        default=9,
        help='kernel dimension for PrimaryCaps layer.  default=9')
    parser.add_argument('--primary-stride',
                        type=int,
                        default=2,
                        help='stride for PrimaryCaps layer.  default=2')
    parser.add_argument('--num-classes',
                        type=int,
                        default=10,
                        help='number of output classes.  default=10 for MNIST')
    parser.add_argument(
        '--digit-caps-dim',
        type=int,
        default=16,
        help='dimension of capsules in DigitCaps layer. default=16')
    parser.add_argument(
        '--dec1-dim',
        type=int,
        default=512,
        help='output dimension of first layer in decoder.  default=512')
    parser.add_argument(
        '--dec2-dim',
        type=int,
        default=1024,
        help='output dimension of seconda layer in decoder.  default=1024')
    parser.add_argument('--num-routing',
                        type=int,
                        default=3,
                        help='number of routing iteration. default=3')
    parser.add_argument(
        '--use-reconstruction-loss',
        type=utils.str2bool,
        nargs='?',
        default=True,
        help='use an additional reconstruction loss. default=True')
    parser.add_argument(
        '--regularization-scale',
        type=float,
        default=0.0005,
        help=
        'regularization coefficient for reconstruction loss. default=0.0005')
    parser.add_argument('--dataset',
                        help='the name of dataset (mnist, cifar10)',
                        default='mnist')
    parser.add_argument(
        '--input-width',
        type=int,
        default=28,
        help='input image width to the convolution. default=28 for MNIST')
    parser.add_argument(
        '--input-height',
        type=int,
        default=28,
        help='input image height to the convolution. default=28 for MNIST')
    parser.add_argument('--directory',
                        type=str,
                        default=PROJECT_DIR / 'results',
                        help='directory to store results')
    parser.add_argument('--data-directory',
                        type=str,
                        default=PROJECT_DIR / 'data',
                        help='directory to store data')
    parser.add_argument('--description',
                        type=str,
                        default='no description',
                        help='description to store together with results')
    parser.add_argument('--exp-decay-lr',
                        action='store_true',
                        default=False,
                        help='use exponential decay of learning rate')
    parser.add_argument(
        '--decay-steps',
        type=int,
        default=4000,
        help=
        'decay steps for exponential learning rate adjustment.  default = 2000'
    )
    parser.add_argument(
        '--decay-rate',
        type=float,
        default=0.96,
        help=
        'decay rate for exponential learning rate adjustment.  default=1 (no adjustment)'
    )
    parser.add_argument('--staircase',
                        action='store_true',
                        default=False,
                        help='activate staircase for learning rate adjustment')
    # one cycle policy
    parser.add_argument('--one-cycle-policy',
                        action='store_true',
                        default=False,
                        help='use one cycle policy for learning rate')
    # warm restarts
    parser.add_argument('--warm-restarts',
                        action='store_true',
                        default=False,
                        help='use warm restarts of the learning rate')
    parser.add_argument(
        '--Ti',
        type=float,
        default=10.0,
        help='number of epochs of a cycle of the warm restarts')
    parser.add_argument('--Tmult',
                        type=float,
                        default=1.0,
                        help='multiplier factor for the warm restarts')
    # adaptive batch size
    parser.add_argument('--adabatch',
                        action='store_true',
                        default=False,
                        help='activate adabatch.  default False')
    parser.add_argument('--adapow',
                        type=int,
                        default=2,
                        help='power of two for adabatch size')
    # weight sharing
    parser.add_argument('--conv-shared-weights', type=int, default=0)
    parser.add_argument('--primary-shared-weights', type=int, default=0)
    parser.add_argument('--digit-shared-weights', type=int, default=0)
    parser.add_argument('--conv-shared-bias', type=int, default=0)
    # small decoder
    parser.add_argument(
        '--small-decoder',
        action='store_true',
        default=False,
        help='enables the small decoder instead of the standard one')
    # restart option
    parser.add_argument('--restart-training',
                        action='store_true',
                        default=False)
    # squash approx
    parser.add_argument('--squash-approx', action='store_true', default=False)

    # find best learning rate interval
    parser.add_argument('--find-lr',
                        action='store_true',
                        default=False,
                        help='train to find the best learning rate')

    # normalize or not the inputs to the net (not normalized is better)
    parser.add_argument('--normalize-input',
                        action='store_true',
                        default=False,
                        help='enables normalization and disables random '
                        'cropping the '
                        'inputs with padding 2')

    # use new / old version of the model
    parser.add_argument('--old-model',
                        action='store_true',
                        default=False,
                        help='uses old model')

    args = parser.parse_args(args=arguments)

    args.directory = pathlib.Path(args.directory)

    print(args)

    if args.old_model:
        from src.model.model import Net
        import src.model.functions as func
        ModelToUse = Net

        def loss_func(output, target, regularization_scale, reconstruction,
                      data, device, batch_size):
            return func.loss(output, reconstruction, target, data,
                             regularization_scale, device)
    else:
        from src.model.layers import CapsNet
        from src.model.layers import loss_func as loss_func_internal
        ModelToUse = CapsNet

        def loss_func(output, target, regularization_scale, reconstruction,
                      data, device, batch_size):
            return loss_func_internal(output, target,
                                      regularization_scale, reconstruction,
                                      data.view(batch_size, -1), device)

    # Check GPU or CUDA is available
    args.cuda = not args.no_cuda and torch.cuda.is_available()
    if not args.cuda:
        args.device = 'cpu'

    # Get reproducible results by manually seed the random number generator
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    # Load data
    train_loader, test_loader = utils.load_data(args)

    if args.adabatch:
        temp_bs = args.batch_size

        args.batch_size = 2**(args.adapow)
        train_loader1, _ = utils.load_data(args)

        args.batch_size = 2**(args.adapow)
        train_loader2, _ = utils.load_data(args)

        args.batch_size = 2**(args.adapow)
        train_loader3, _ = utils.load_data(args)

        args.batch_size = temp_bs

    # Build Capsule Network
    print('===> Building model')
    model = ModelToUse(input_wh=args.input_width,
                       num_conv_in_channels=args.num_conv_in_channels,
                       num_conv_out_channels=args.num_conv_out_channels,
                       conv_kernel=args.conv_kernel,
                       conv_stride=args.conv_stride,
                       num_primary_channels=args.num_primary_channels,
                       primary_caps_dim=args.primary_caps_dim,
                       primary_kernel=args.primary_kernel,
                       primary_stride=args.primary_stride,
                       num_classes=args.num_classes,
                       digit_caps_dim=args.digit_caps_dim,
                       iter=args.num_routing,
                       dec1_dim=args.dec1_dim,
                       dec2_dim=args.dec2_dim,
                       cuda_enabled=args.cuda,
                       device=args.device,
                       regularization_scale=args.regularization_scale,
                       conv_shared_weights=args.conv_shared_weights,
                       primary_shared_weights=args.primary_shared_weights,
                       digit_shared_weights=args.digit_shared_weights,
                       conv_shared_bias=args.conv_shared_bias,
                       small_decoder=args.small_decoder,
                       squash_approx=args.squash_approx)
    # Optimizer
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    # optimizer = optim.SGD(model.parameters(), lr=args.lr)
    lr_wr = utils.custom_warm_restarts(args.lr, args.lr * 10)
    starting_epoch = 1

    if args.cuda:
        print('Utilize GPUs for computation')
        print('Number of GPU available', torch.cuda.device_count())
        model.to(args.device)
        cudnn.benchmark = True
        model = torch.nn.DataParallel(model)

    args.file_flag = 'w'
    if args.restart_training:
        args.file_flag = args.file_flag
        p = pathlib.Path(args.directory) / + 'trained_model'
        if p.exists():
            l = sorted(list(p.iterdir()))
            if l:
                f = l[-1]
                pckl = utils.load(str(f))
                model.load_state_dict(pckl['model_state_dict'])
                optimizer.load_state_dict(pckl['optimizer_state_dict'])
                lr_wr.__dict__ = pckl['lr_wr']
                starting_epoch = pckl['epoch']

    # Print the model architecture and parameters
    print('Model architectures:\n{}\n'.format(model))

    print('Parameters and size:')
    for name, param in model.named_parameters():
        print('{}: {}'.format(name, list(param.size())))

    # CapsNet has:
    # - 8.2M parameters and 6.8M parameters without the reconstruction subnet on MNIST.
    # - 11.8M parameters and 8.0M parameters without the reconstruction subnet on CIFAR10.
    num_params = sum([param.nelement() for param in model.parameters()])

    # The coupling coefficients c_ij are not included in the parameter list,
    # we need to add them manually, which is 1152 * 10 = 11520 (on MNIST) or 2048 * 10 (on CIFAR10)
    print('\nTotal number of parameters: {}\n'.format(num_params + (
        11520 if args.dataset in ('mnist', 'fashionmnist') else 20480)))

    # Make model checkpoint directory
    if not (args.directory / 'trained_model').is_dir():
        (args.directory / 'trained_model').mkdir(parents=True, exist_ok=True)

    # files to store accuracies and losses
    train_mloss = args.directory / 'train_margin_loss.txt'
    train_rloss = args.directory / 'train_reconstruction_loss.txt'
    train_acc = args.directory / 'train_accuracy.txt'

    test_mloss = args.directory / 'test_margin_loss.txt'
    test_rloss = args.directory / 'test_reconstruction_loss.txt'
    test_acc = args.directory / 'test_accuracy.txt'

    learning_rate = args.directory / 'learning_rate.txt'
    output_tensor = args.directory / 'output_tensor.txt'

    n_parameters = args.directory / 'n_parameters.txt'
    with open(n_parameters, args.file_flag) as f:
        f.write('{}\n'.format(num_params +
                              (11520 if args.dataset == 'mnist' else 20480)))

    arguments_file = args.directory / 'arguments.txt'
    with open(arguments_file, args.file_flag) as f:
        pprint.pprint(args.__dict__, stream=f)

    description = args.directory / 'details.txt'
    description = open(description, args.file_flag)
    description.write(args.description)
    description.close()

    train_mloss = open(train_mloss, args.file_flag)
    train_rloss = open(train_rloss, args.file_flag)
    train_acc = open(train_acc, args.file_flag)
    test_mloss = open(test_mloss, args.file_flag)
    test_rloss = open(test_rloss, args.file_flag)
    test_acc = open(test_acc, args.file_flag)
    learning_rate = open(learning_rate, args.file_flag)
    output_tensor = open(output_tensor, args.file_flag)

    utils.dump(utils.make_dataset_obj(locals(), globals()),
               args.directory / 'trained_model' / 'dataset')

    # Train and test
    try:
        for epoch in range(starting_epoch, args.epochs + 1):

            if not args.adabatch:
                train(model, train_loader, optimizer, epoch, train_mloss,
                      train_rloss, train_acc, learning_rate, lr_wr,
                      output_tensor)
                test(model, test_loader, len(train_loader), epoch, test_mloss,
                     test_rloss, test_acc, args.directory)
            else:
                if (1 <= epoch <= 3):
                    train(model, train_loader, optimizer, epoch, train_mloss,
                          train_rloss, train_acc, learning_rate, lr_wr,
                          output_tensor)
                    test(model, test_loader, len(train_loader), epoch,
                         test_mloss, test_rloss, test_acc, args.directory)
                elif (4 <= epoch <= 33):
                    args.batch_size = 2**(args.adapow)
                    train(model, train_loader1, optimizer, epoch, train_mloss,
                          train_rloss, train_acc, learning_rate, lr_wr,
                          output_tensor)
                    test(model, test_loader, len(train_loader), epoch,
                         test_mloss, test_rloss, test_acc, args.directory)
                elif (34 <= epoch <= 63):
                    args.batch_size = 2**(args.adapow + 1)
                    train(model, train_loader2, optimizer, epoch, train_mloss,
                          train_rloss, train_acc, learning_rate, lr_wr,
                          output_tensor)
                    test(model, test_loader, len(train_loader), epoch,
                         test_mloss, test_rloss, test_acc, args.directory)
                else:
                    args.batch_size = 2**(args.adapow + 2)
                    train(model, train_loader3, optimizer, epoch, train_mloss,
                          train_rloss, train_acc, learning_rate, lr_wr,
                          output_tensor)
                    test(model, test_loader, len(train_loader), epoch,
                         test_mloss, test_rloss, test_acc, args.directory)
            train_mloss.flush()
            train_rloss.flush()
            train_acc.flush()
            test_mloss.flush()
            test_rloss.flush()
            test_acc.flush()
            learning_rate.flush()
            output_tensor.flush()

            # Save model checkpoint
            utils.checkpoint(utils.make_partial_checkpoint_obj(
                locals(), globals()),
                             epoch,
                             directory=args.directory)
    except KeyboardInterrupt:
        print("\n\n\nKeyboardInterrupt, Interrupting...")

    train_mloss.close()
    train_rloss.close()
    train_acc.close()
    test_mloss.close()
    test_rloss.close()
    test_acc.close()
    learning_rate.close()
    output_tensor.close()
    with open(args.directory / 'best_accuracy.txt', args.file_flag) as f:
        f.write("%.10f,%d\n" % (best_acc, best_acc_epoch))
    print('\n\nBest Accuracy: ' + str(best_acc) +
          '%%\nReached at epoch: %d\n\n' % best_acc_epoch)

    global avg_training_time_per_epoch
    global avg_testing_time_per_epoch

    with open(args.directory / 'average_training_time_per_epoch.txt',
              args.file_flag) as f:
        f.write("%.10f\n" % avg_training_time_per_epoch)
    print('Average time per training epoch: %.10f\n\n' %
          avg_training_time_per_epoch)
    with open(args.directory / 'average_testing_time_per_epoch.txt',
              args.file_flag) as f:
        f.write("%.10f\n" % avg_testing_time_per_epoch)
    print('Average time per testing epoch: %.10f\n\n' %
          avg_testing_time_per_epoch)
Beispiel #23
0
def plot_manymore(exps,
                  metric='accuracy',
                  measure="mean",
                  info=None,
                  save=False):
    # Configs
    _metric = metric
    metric, measure = verify_metrics(metric, measure)
    xlabel = 'Rounds'
    ylabel = f' {measure.capitalize()} {_metric.capitalize()}'
    title = f'{_metric.capitalize()} vs. No. of rounds'
    if info is not None:
        xlabel = info.get('xlabel', xlabel)
        ylabel = info.get('ylabel', ylabel)
        title = info.get('title', title)
    plt.ylabel(ylabel, fontsize=13)
    plt.xlabel(xlabel, fontsize=13)
    colors = [
        'green', 'blue', 'orange', 'black', 'red', 'grey', 'tan', 'pink',
        'navy', 'aqua'
    ]
    colors = [
        'black', 'green', 'orange', 'blue', 'red', 'grey', 'tan', 'pink',
        'navy', 'aqua'
    ]
    line_styles = ['-', '--', '-.', '-', '--', '-.', ':', '-', '--', '-.', ':']
    plt.grid(linestyle='dashed')
    plt.rc('legend', fontsize=12)
    plt.xticks(fontsize=13, )
    plt.yticks(fontsize=13, )
    std_data = None
    for i, exp in enumerate(exps):
        # Data
        logs = load(exp['file'])
        name = exp.get('name', "")
        if measure == "mean":
            data = np.mean([[v[metric] for v in lo] for lo in logs.values()],
                           axis=0)
        elif measure == "mean-std":
            data = np.mean([[v[metric] for v in lo] for lo in logs.values()],
                           axis=0)
            std_data = np.std([[v[metric] for v in lo]
                               for lo in logs.values()],
                              axis=0)
        elif measure == "max":
            data = np.max([[v[metric] for v in lo] for lo in logs.values()],
                          axis=0)
        else:
            data = np.std([[v[metric] for v in lo] for lo in logs.values()],
                          axis=0)
        x = range(0, len(data) * EVAL_ROUND, EVAL_ROUND)
        plt.plot(x,
                 data,
                 color=colors[i],
                 label=name,
                 linestyle=line_styles[i])
        if std_data is not None:
            plt.fill_between(x,
                             data - std_data,
                             data + std_data,
                             color=colors[i],
                             alpha=.05)

    plt.legend(loc="lower right", shadow=True)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    # plt.yticks(np.arange(0, 1.1, 0.2))
    plt.title(title)
    if save:
        unique = np.random.randint(100, 999)
        plt.savefig(f"../out/EXP_{unique}.pdf")
    plt.show()
import argparse
import src.visualization.visualize as viz
import src.graph_analysis as ga

from src.utils import load
from src.utils import load_graphml_format
from src.predict_days import load_dataset
from src.predict_days import predict_days
from src.utils import GRAPHML_FORMAT
from src.utils import PAJEK_FORMAT
from src.utils import DATASET_PATH


if __name__ == "__main__":
    reuters_graphml = load_graphml_format(GRAPHML_FORMAT)
    reuters_pajek = load(PAJEK_FORMAT)
    parser = argparse.ArgumentParser(description="Initializing graph analysis")
    parser.add_argument('-vg',
                        '--visualize-graph',
                        help="Visualize graph and save it to /figures folder")
    parser.add_argument('-ddd',
                        '--draw-degree-distribution',
                        help="Visualize degree distribution")
    parser.add_argument('-bc',
                        '--betweenness-centrality',
                        help="Visualize nodes with highest centrality value")
    parser.add_argument('-cc',
                        '--closeness-centrality',
                        help="Visualize nodes with highest centrality value")
    parser.add_argument('-vc',
                        '--visualize-communities',
Beispiel #25
0
    parser.add_argument('-n',
                        '--name',
                        default='model',
                        type=str,
                        help='Name of the model.')

    args = parser.parse_args()

    trained_model = os.path.join(BASE_PATH, 'models/{}.hdf5'.format(args.name))

    model = MoleculeCVAE(gpu_mode=args.gpu)
    model.load(CHARSET, trained_model, latent_rep_size=LATENT_DIM)

    signatures, smiles = load(os.path.join(BASE_PATH, 'data/inchikeys_B4.npy'),
                              os.path.join(BASE_PATH,
                                           'data/signature_B4_matrix.npy'),
                              os.path.join(BASE_PATH, 'data/key2inch_B4.csv'),
                              calc_smiles=False)

    m = args.num_molecules
    n = args.num_reconstructions

    reconstructed_smiles = sim_reconstruction(model,
                                              np.array(smiles[-m:]),
                                              np.array(signatures[-m:]),
                                              latent_dim=LATENT_DIM,
                                              su=SignatureUtils(signatures),
                                              m=m,
                                              n=n,
                                              stds=[0, 0.05, 0.1],
                                              fix=args.fix)