Exemple #1
0
    def run_collapse(self, hosts, cmd):
        progress = None
        if self.progressbar:
            from progressbar import ProgressBar, Percentage, Bar, ETA, FileTransferSpeed
            progress = ProgressBar(widgets=[
                "Running: ",
                Percentage(), ' ',
                Bar(marker='.'), ' ',
                ETA(), ' ',
                FileTransferSpeed()
            ],
                                   maxval=len(hosts))

        codes = {"total": 0, "error": 0, "success": 0}
        outputs = defaultdict(list)

        def worker(host, cmd):
            p = Popen(self.get_parallel_ssh_options(host, cmd),
                      stdout=PIPE,
                      stderr=PIPE)
            o = ""
            while True:
                outs, _, _ = select([p.stdout, p.stderr], [], [])
                outline = errline = ""
                if p.stdout in outs:
                    outline = p.stdout.readline()
                if p.stderr in outs:
                    errline = p.stderr.readline()
                o += outline + errline

                if outline == "" and errline == "" and p.poll() is not None:
                    break

            if o == "":
                o = colored("[ No Output ]\n", "yellow")
            outputs[o].append(host)
            if p.poll() == 0:
                codes["success"] += 1
            else:
                codes["error"] += 1
            codes["total"] += 1
            if self.progressbar:
                progress.update(codes["total"])

        pool = Pool(self.ssh_threads)
        if self.progressbar:
            progress.start()
        for host in hosts:
            pool.start(Greenlet(worker, host, cmd))

        try:
            pool.join()
        except KeyboardInterrupt:
            pass

        if self.progressbar:
            progress.finish()
        self.print_exec_results(codes)
        print()
        for output, hosts in outputs.items():
            msg = " %s    " % ','.join(hosts)
            table_width = min([len(msg) + 2, terminal_size()[0]])
            cprint("=" * table_width, "blue", attrs=["bold"])
            cprint(msg, "blue", attrs=["bold"])
            cprint("=" * table_width, "blue", attrs=["bold"])
            print(output)
def NETSGraph(results, NETS_edges, node_labeler, node_type, edge_labeler):
    '''
    Function takes a json file of query results, a list of NETS edges, node and edge metadata dictionaries, and a
    dictionary containing NETS edge information by BIO node. Using these items the function creates the directed
    OWL-NETS abstraction network. Node metadata includes: labels (a list of human readable labels); id (the endpoint
    database identifiers); and bio (the NETS node type). Edge metadata includes: labels (human readable label for the
    edge between two NETS nodes) and id (the ontology concept term used to link the NETS nodes).
    :param results: json file containing the query results from endpoint
    :param NETS_edges: list of lists, where each list is a NETS edge and the order specifies a directional relationship
    :param node_labeler: node metadata nested lists (list[0] contains the NETS nodes label triples, list[1] contains the
    contains the NETS nodes identifier triples)
    :param node_type: dictionary with BIO node as key and set of NETS node types as value
    :param edge_labeler: dictionary where the keys are the NETS edges and the values are the edge labels
    :return: OWL-NETS directed graph
    '''
    print 'Started building OWL-NETS graph'

    # initialize progress bar progress bar
    widgets = [Percentage(), Bar(), FormatLabel('(elapsed: %(elapsed)s)')]
    pbar = ProgressBar(widgets=widgets, maxval=len(results['results']['bindings']))

    NETS_graph = nx.DiGraph()

    for res in pbar(results['results']['bindings']):
        for edge in NETS_edges:

            i = res[str(edge[0].strip('?').encode('utf8'))]['value'].encode('utf8')
            j = res[str(edge[1].strip('?').encode('utf8'))]['value'].encode('utf8')

            # set nodes
            NETS_graph.add_node(min(node_labeler[edge[0].strip('?')][i]['label'], key=len),
                                labels=node_labeler[edge[0].strip('?')][i]['label'],
                                id=node_labeler[edge[0].strip('?')][i]['id'],
                                bio=i,
                                type='-'.join(list(node_type[i])))

            # gets second node in edge
            NETS_graph.add_node(min(node_labeler[edge[1].strip('?')][j]['label'], key=len),
                                labels=node_labeler[edge[1].strip('?')][j]['label'],
                                id=node_labeler[edge[1].strip('?')][j]['id'],
                                bio=j,
                                type='-'.join(list(node_type[j])))
            # add edge
            NETS_graph.add_edge(min(node_labeler[edge[0].strip('?')][i]['label'], key=len),
                                min(node_labeler[edge[1].strip('?')][j]['label'], key=len),
                                labels=res[(edge_labeler[tuple(edge)]['label']).strip('?')]['value'].encode('utf8'),
                                id=(edge_labeler[tuple(edge)]['id']).strip('?'),
                                edge='-'.join([edge[0].strip('?'), edge[1].strip('?')]))


    # closes first progress bar
    pbar.finish()
    print 'Finished building OWL-NETS graph'
    print '\n'

    # print information about graph
    print 'Directed OWL-NETS Graph has ' + str(len(NETS_graph.nodes())) + ' nodes, ' + str(
        len(NETS_graph.edges())) + ' edges, and ' + str(
        nx.number_connected_components(NETS_graph.to_undirected())) + ' connected component(s)'

    return NETS_graph
Exemple #3
0
    def _do_distribution(self, data, spectrograms, name):

        pbar = None
        try:
            from progressbar import ProgressBar, Percentage, Bar
            pbar = ProgressBar(widgets=[Percentage(), Bar()],
                               maxval=len(data.track_names))
            pbar.start()
        except Exception as e:
            pass

        if config.learn_phase:
            if not os.path.exists(self.analysisPath):
                os.mkdir(self.analysisPath)
            h5f_path = os.path.join(self.analysisPath,
                                    "distribution_ir_%s.hdf5" % name)
            h5file = h5py.File(h5f_path, "w")
            h5real = h5file.create_group("real")
            h5imag = h5file.create_group("imag")

            plt.figure(figsize=(15, 15))
            plt.suptitle(name)
            ax1 = plt.subplot(231)
            bins = self._do_distribution_plot(pbar, h5real, data, spectrograms,
                                              None, "upper", "real")

            plt.subplot(232, sharey=ax1, sharex=ax1)
            self._do_distribution_plot(pbar, h5real, data, spectrograms,
                                       bins, "center", "real")

            plt.subplot(233, sharey=ax1, sharex=ax1)
            self._do_distribution_plot(pbar, h5real, data, spectrograms,
                                       bins, "lower", "real")

            ax1 = plt.subplot(234)
            bins = self._do_distribution_plot(pbar, h5imag, data, spectrograms,
                                              None, "upper", "imag")

            plt.subplot(235, sharey=ax1, sharex=ax1)
            self._do_distribution_plot(pbar, h5imag, data, spectrograms,
                                       bins, "center", "imag")

            plt.subplot(236, sharey=ax1, sharex=ax1)
            self._do_distribution_plot(pbar, h5imag, data, spectrograms,
                                       bins, "lower", "imag")

            h5file.close()

            if not os.path.exists(self.analysisPath):
                os.mkdir(self.analysisPath)
            plt.savefig(os.path.join(self.analysisPath,
                                     "distribution_%s_ir.png" % name))
            plt.close()
        else:
            if not os.path.exists(self.analysisPath):
                os.mkdir(self.analysisPath)
            h5f_path = os.path.join(self.analysisPath,
                                    "distribution_amplitude_%s.hdf5" % name)
            h5file = h5py.File(h5f_path, "w")

            plt.figure(figsize=(15, 15))
            plt.suptitle(name)
            ax1 = plt.subplot(131)
            bins = self._do_distribution_plot(pbar, h5file, data, spectrograms,
                                              None, "upper")

            plt.subplot(132, sharey=ax1, sharex=ax1)
            self._do_distribution_plot(pbar, h5file, data, spectrograms,
                                       bins, "center")

            plt.subplot(133, sharey=ax1, sharex=ax1)
            self._do_distribution_plot(pbar, h5file, data, spectrograms,
                                       bins, "lower")
            h5file.close()

            if not os.path.exists(self.analysisPath):
                os.mkdir(self.analysisPath)
            plt.savefig(os.path.join(self.analysisPath,
                                     "distribution_%s_amplitude.png" % name))
            plt.close()
Exemple #4
0
    triggers = zapi.trigger.get(
        output=['description', 'triggerid'],
        hostids=[h[0]['hostid']],
        expandDescription=1,
        search={'description': ': {0}'.format(host['trigger'])})
    logger.info('Found {0} triggers for host {1}'.format(
        triggers.__len__(), host['host']))
    logger.print_json(triggers)
    for t in triggers:
        maintenance_triggers_ids.append(t['triggerid'])

i = 0
logger.info('Found {0} triggers'.format(maintenance_triggers_ids.__len__()))
bar = ProgressBar(
    maxval=maintenance_triggers_ids.__len__(),
    widgets=[Percentage(),
             ReverseBar(),
             ETA(),
             RotatingMarker(),
             Timer()]).start()
for t in maintenance_triggers_ids:
    if args.run == True:
        out = zapi.trigger.update(triggerid=t, status=args.status)
        i += 1
        bar.update(i)
    else:
        logger.warning('Should change triggerid {0} to status {1}'.format(
            t, args.status))
bar.finish()

zapi.user.logout()
Exemple #5
0
tempid = np.zeros(n)
dist = np.zeros(n)

# construct the KDTree from the centroid nodes
print('Constructing KDTree object from centroid nodes ...')
source = np.column_stack((centroid_x, centroid_y))
tree = spatial.cKDTree(source)

# used for FEM shape function
ones = np.ones(3)

# the list that stores the triangle polygon for a particular TIN element
poly = list()

# for the progress bar
w = [Percentage(), Bar(), ETA()]
pbar = ProgressBar(widgets=w, maxval=n).start()

print('Searching using KDTree ...')
for i in range(len(x)):  # just do for one node for now
    d, idx = tree.query((x[i], y[i]), k=neigh)

    # instead of specifying number of neighbours, specify search radius
    #idx = tree.query_ball_point( (m_x[i],m_y[i]), neigh)

    # reconstruct a poly out of the tin element for each idx
    not_found = 0
    for j in range(len(idx)):

        # find the area of each triangle in the search space
        x1 = t_x[t_ikle[idx[j], 0]]
Exemple #6
0
    predict_span = 50

    grid_circ = 7
    data_dir = "../data/h5_test/{}_{}".format(start_string, end_string)

    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    aq_count = 0

    print("\nFetching data to export...")
    for aq_name in aq_location.keys():
        aggregate = 0

        ti.sleep(0.1)
        bar = PB(initial_value=0, maxval=delta_time + 1,
                 widgets=[aq_name, ' ', Bar('=', '[', ']'), ' ', Percentage()])

        valid_count = 0
        near_grids, grid_coor_array = get_grids(aq_name, grid_circ)

        # Validate the near grid matrix algorithm
        # plt.figure()
        # plt.title(aq_name)
        # plt.plot(aq_location[aq_name][0], aq_location[aq_name][1], '.')
        # plt.plot(grid_coor_array[:, 0], grid_coor_array[:, 1], '.')
        # plt.show()

        # Exporting data from start to end
        grid_matrix = []
        history_matrix = []
        predict_matrix = []
def pleiopred_genomewide(data_file_D1,
                         data_file_D2,
                         alpha,
                         Pi,
                         init_betas_prefix,
                         ld_radius=None,
                         ld_dict=None,
                         out_file_prefix=None,
                         n1=None,
                         n2=None,
                         PRF=None,
                         num_iter=60,
                         burn_in=10,
                         zero_jump_prob=0.05,
                         user_h1=None,
                         user_h2=None):
    """
    Calculate LDpred for a genome
    """
    prf_chr = PRF['chrom']
    prf_sids = PRF['sids']
    h2_D1 = PRF['h2_D1']
    h2_D2 = PRF['h2_D2']

    df1 = h5py.File(data_file_D1, 'r')
    df2 = h5py.File(data_file_D2, 'r')
    cord_data_g1 = df1['cord_data']
    cord_data_g2 = df2['cord_data']

    has_phenotypes1 = False
    if 'y' in df1.keys():
        'Validation phenotypes of disease 1 found.'
        y1 = df1['y'][...]  # Phenotype
        num_individs1 = len(y1)
        prs_D1 = sp.zeros(num_individs1)
        has_phenotypes1 = True

    has_phenotypes2 = False
    if 'y' in df2.keys():
        'Validation phenotypes of disease 2 found.'
        y2 = df2['y'][...]  # Phenotype
        num_individs2 = len(y2)
        prs_D2 = sp.zeros(num_individs2)
        has_phenotypes2 = True

    ld_scores_dict = ld_dict['ld_scores_dict']
    chrom_ld_dict = ld_dict['chrom_ld_dict']
    chrom_ref_ld_mats = ld_dict['chrom_ref_ld_mats']
    chrom_snps = ld_dict['chrom_snps']
    chrom_snpids = ld_dict['chrom_snpids']

    chrom_betas1 = ld_dict['chrom_betas1']
    chrom_betas2 = ld_dict['chrom_betas2']

    num_snps1 = 0
    sum_beta2s1 = 0
    num_snps2 = 0
    sum_beta2s2 = 0

    chr_list = list(set(cord_data_g1.keys()) & set(cord_data_g2.keys()))

    for chrom_str in chromosomes_list:
        if chrom_str in chr_list:
            betas1 = chrom_betas1[chrom_str]
            n_snps1 = len(betas1)
            num_snps1 += n_snps1
            sum_beta2s1 += sp.sum(betas1**2)
            betas2 = chrom_betas2[chrom_str]
            n_snps2 = len(betas2)
            num_snps2 += n_snps2
            sum_beta2s2 += sp.sum(betas2**2)

    if user_h1 is None or user_h2 is None:
        L1 = ld_scores_dict['avg_gw_ld_score']
        chi_square_lambda1 = sp.mean(n1 * sum_beta2s1 / float(num_snps1))
        print 'Genome-wide lambda inflation of D1:', chi_square_lambda1
        print 'Genome-wide mean LD score of D1:', L1
        gw_h2_ld_score_est1 = max(0.0001, (max(1, chi_square_lambda1) - 1) /
                                  (n1 * (L1 / num_snps1)))
        print 'Estimated genome-wide heritability of D1:', gw_h2_ld_score_est1

        assert chi_square_lambda1 > 1, 'Something is wrong with the GWAS summary statistics of D1.  Perhaps there were issues parsing of them, or the given GWAS sample size (N) was too small. Either way, lambda (the mean Chi-square statistic) is too small.  '

        L2 = ld_scores_dict['avg_gw_ld_score']
        chi_square_lambda2 = sp.mean(n2 * sum_beta2s2 / float(num_snps2))
        print 'Genome-wide lambda inflation of D2:', chi_square_lambda2
        print 'Genome-wide mean LD score of D2:', L2
        gw_h2_ld_score_est2 = max(0.0001, (max(1, chi_square_lambda2) - 1) /
                                  (n2 * (L2 / num_snps2)))
        print 'Estimated genome-wide heritability of D2:', gw_h2_ld_score_est2

        assert chi_square_lambda2 > 1, 'Something is wrong with the GWAS summary statistics of D2.  Perhaps there were issues parsing of them, or the given GWAS sample size (N) was too small. Either way, lambda (the mean Chi-square statistic) is too small.  '
    else:
        gw_h2_ld_score_est1 = user_h1
        gw_h2_ld_score_est2 = user_h2

    h2_new1 = sp.sum(h2_D1)
    sig_12_D1 = (1.0) / n1
    pr_sig1 = {}

    h2_new2 = sp.sum(h2_D2)
    sig_12_D2 = (1.0) / n2
    pr_sig2 = {}

    post_betas1 = {}
    post_betas2 = {}

    out1 = []
    out1.append('Estimated Genome-wide heritability: ' +
                str(gw_h2_ld_score_est1) + '\n')
    out1.append('Posterior variance for each snp: ' + str(sig_12_D1) + '\n')

    out2 = []
    out2.append('Estimated Genome-wide heritability: ' +
                str(gw_h2_ld_score_est2) + '\n')
    out2.append('Posterior variance for each snp: ' + str(sig_12_D2) + '\n')

    ## main calculation, chr by chr, posterior betas and prs ##

    beta1_current = chrom_betas1
    beta2_current = chrom_betas2

    for chrom_str in chromosomes_list:
        if chrom_str in chr_list:
            print 'Preparing annotation-based priors for Chromosome %s' % (
                (chrom_str.split('_'))[1])

            pval_derived_betas1 = chrom_betas1[chrom_str]
            pval_derived_betas2 = chrom_betas2[chrom_str]
            sids = chrom_snpids[chrom_str]

            n_snps_chrom = len(sids)

            chri = int(chrom_str.split('_')[1])
            prf_sids_chri = prf_sids[prf_chr == chri]
            h2_D1_chri = h2_D1[prf_chr == chri]
            h2_D2_chri = h2_D2[prf_chr == chri]
            if len(prf_sids_chri) == len(sids):
                if sum(prf_sids_chri == sids) == len(prf_sids_chri):
                    pr_sig1[chrom_str] = sp.copy(h2_D1_chri)
                    pr_sig2[chrom_str] = sp.copy(h2_D2_chri)
                else:
                    print 'sorting prior files'
                    pr_sig1[chrom_str] = sp.zeros(len(sids))
                    pr_sig2[chrom_str] = sp.zeros(len(sids))
                    for i, sid in enumerate(sids):
                        pr_sig1[chrom_str][i] = h2_D1_chri[prf_sids_chri ==
                                                           sid]
                        pr_sig2[chrom_str][i] = h2_D2_chri[prf_sids_chri ==
                                                           sid]
            else:
                print 'extracting prior files'
                pr_sig1[chrom_str] = sp.zeros(len(sids))
                pr_sig2[chrom_str] = sp.zeros(len(sids))
                for i, sid in enumerate(sids):
                    pr_sig1[chrom_str][i] = h2_D1_chri[prf_sids_chri == sid]
                    pr_sig2[chrom_str][i] = h2_D2_chri[prf_sids_chri == sid]

            pr_sig1[
                chrom_str] = gw_h2_ld_score_est1 * pr_sig1[chrom_str] / h2_new1
            pr_sig2[
                chrom_str] = gw_h2_ld_score_est2 * pr_sig2[chrom_str] / h2_new2

    ########################### using AnnoPred-baseline as initial values ###############################
    init_betas_path = '%s.pickled.gz' % init_betas_prefix
    if not os.path.isfile(init_betas_path):
        print 'No initial values for mcmc found, generating ... '
        anno_post1 = {}
        anno_post2 = {}
        for chrom_str in chromosomes_list:
            if chrom_str in chr_list:
                pval_derived_betas1 = chrom_betas1[chrom_str]
                pval_derived_betas2 = chrom_betas2[chrom_str]
                annopred_betas1 = annopred_inf(
                    pval_derived_betas1,
                    pr_sigi=pr_sig1[chrom_str],
                    reference_ld_mats=chrom_ref_ld_mats[chrom_str],
                    n=n1,
                    ld_window_size=2 * ld_radius)
                annopred_betas2 = annopred_inf(
                    pval_derived_betas2,
                    pr_sigi=pr_sig2[chrom_str],
                    reference_ld_mats=chrom_ref_ld_mats[chrom_str],
                    n=n2,
                    ld_window_size=2 * ld_radius)
                anno_post1[chrom_str] = annopred_betas1
                anno_post2[chrom_str] = annopred_betas2
        init_betas = {'anno_post1': anno_post1, 'anno_post2': anno_post2}
        f = gzip.open(init_betas_path, 'wb')
        cPickle.dump(init_betas, f, protocol=2)
        f.close()
        print 'LD information is now pickled at %s' % init_betas_path
    else:
        print 'Loading initial values for mcmc from file: %s' % init_betas_path
        f = gzip.open(init_betas_path, 'r')
        init_betas = cPickle.load(f)
        f.close()
    #### initial values ####
    print 'Preparing initial values for MCMC'
    beta1_current = init_betas['anno_post1']
    beta2_current = init_betas['anno_post2']
    avg_betas1 = {}
    avg_betas2 = {}
    avg_PV = sp.zeros(4)
    for chrom_str in chromosomes_list:
        if chrom_str in chr_list:
            avg_betas1[chrom_str] = sp.zeros(len(chrom_betas1[chrom_str]))
            avg_betas2[chrom_str] = sp.zeros(len(chrom_betas2[chrom_str]))

#    Pi = sp.random.dirichlet((alpha,alpha,alpha,alpha),1).flatten()
    print 'Initial PV: (' + str(Pi[0]) + ', ' + str(Pi[1]) + ', ' + str(
        Pi[2]) + ', ' + str(Pi[3]) + ')'
    sp.savetxt('%s_Initial_PV' % (out_file_prefix) + '.txt', Pi)
    pb = 0
    pbar = ProgressBar(widgets=[Percentage(), ' ',
                                Bar(), " ",
                                Timer()],
                       maxval=num_iter * 22).start()
    for k in range(num_iter):  #Big iteration
        A1 = 0
        A2 = 0
        A3 = 0
        A4 = 0
        for chrom_str in chromosomes_list:
            if chrom_str in chr_list:
                posterior_betas = post_betas.bi_mcmc_all_chr(
                    chrom_betas1[chrom_str],
                    chrom_betas2[chrom_str],
                    Pi=Pi,
                    pr_sig1=pr_sig1[chrom_str],
                    pr_sig2=pr_sig2[chrom_str],
                    start_betas1=beta1_current[chrom_str],
                    start_betas2=beta2_current[chrom_str],
                    h2_D1=gw_h2_ld_score_est1 *
                    (n_snps_chrom / float(num_snps1)),
                    n1=n1,
                    h2_D2=gw_h2_ld_score_est2 *
                    (n_snps_chrom / float(num_snps2)),
                    n2=n2,
                    ld_radius=ld_radius,
                    zj_p=zero_jump_prob,
                    ld_dict1=chrom_ld_dict[chrom_str],
                    ld_dict2=chrom_ld_dict[chrom_str])
                A1 += posterior_betas['A1']
                A2 += posterior_betas['A2']
                A3 += posterior_betas['A3']
                A4 += posterior_betas['A4']
                beta1_current[chrom_str] = posterior_betas['proposed_betas1']
                beta2_current[chrom_str] = posterior_betas['proposed_betas2']
                if k >= burn_in:
                    avg_betas1[chrom_str] += posterior_betas[
                        'curr_post_means1']  #Averaging over the posterior means instead of samples.
                    avg_betas2[chrom_str] += posterior_betas[
                        'curr_post_means2']
                pb = pb + 1
                pbar.update(pb)
        Pi = sp.random.dirichlet(
            (alpha[0] + A1, alpha[1] + A2, alpha[2] + A3, alpha[3] + A4),
            1).flatten()
        if k >= burn_in:
            avg_PV += Pi
    pbar.finish()

    ## prs and auc ##
    avg_PV = avg_PV / float(num_iter - burn_in)
    print 'Posterior PV: (' + str(avg_PV[0]) + ', ' + str(
        avg_PV[1]) + ', ' + str(avg_PV[2]) + ', ' + str(avg_PV[3]) + ')'
    sp.savetxt('%s_Avg_PV' % (out_file_prefix) + '.txt', avg_PV)

    for chrom_str in chromosomes_list:
        if chrom_str in chr_list:
            avg_betas1[chrom_str] = avg_betas1[chrom_str] / float(num_iter -
                                                                  burn_in)
            avg_betas2[chrom_str] = avg_betas2[chrom_str] / float(num_iter -
                                                                  burn_in)
            if has_phenotypes1:
                prs_chr_D1 = sp.dot(avg_betas1[chrom_str],
                                    chrom_snps[chrom_str])
                prs_D1 += prs_chr_D1
            if has_phenotypes2:
                prs_chr_D2 = sp.dot(avg_betas2[chrom_str],
                                    chrom_snps[chrom_str])
                prs_D2 += prs_chr_D2


############ PleioPred results #############
    corr_inf1 = sp.corrcoef(y1, prs_D1)[0, 1]
    r2_inf1 = corr_inf1**2
    #results_dict[p_str]['r2_pd']=r2_inf
    print 'D1: the R2 prediction accuracy (observed scale) of PleioPred was: %0.4f (%0.6f)' % (
        r2_inf1, ((1 - r2_inf1)**2) / num_individs1)
    out1.append(
        'D1: the R2 prediction accuracy (observed scale) of PleioPred was: ' +
        str(r2_inf1) + ' (' + str(((1 - r2_inf1)**2) / num_individs1) + ')\n')

    if corr_inf1 < 0:
        prs_D1 = -1 * prs_D1
    auc1 = pred_accuracy(y1, prs_D1)
    print 'D1: PleioPred AUC for the whole genome was: %0.4f' % auc1
    out1.append('D1: PleioPred AUC for the whole genome was: ' + str(auc1) +
                '\n')
    out1.append('D1: PleioPred COR for the whole genome was: ' +
                str(corr_inf1) + '\n')

    sp.savetxt('%s_y_' % (out_file_prefix) + '_D1.txt', y1)
    sp.savetxt('%s_prs' % (out_file_prefix) + '_PleioPred_D1.txt', prs_D1)

    #Now calibration
    ff_inf = open('%s_auc_' % (out_file_prefix) + '_PleioPred_D1.txt', "w")
    ff_inf.writelines(out1)
    ff_inf.close()

    corr_inf2 = sp.corrcoef(y2, prs_D2)[0, 1]
    r2_inf2 = corr_inf2**2
    #results_dict[p_str]['r2_pd']=r2_inf
    print 'D2: the R2 prediction accuracy (observed scale) of PleioPred was: %0.4f (%0.6f)' % (
        r2_inf2, ((1 - r2_inf2)**2) / num_individs2)
    out2.append(
        'D2: the R2 prediction accuracy (observed scale) of PleioPred was: ' +
        str(r2_inf2) + ' (' + str(((1 - r2_inf2)**2) / num_individs2) + ')\n')

    if corr_inf2 < 0:
        prs_D2 = -1 * prs_D2
    auc2 = pred_accuracy(y2, prs_D2)
    print 'D2: PleioPred AUC for the whole genome was: %0.4f' % auc2
    out2.append('D2: PleioPred AUC for the whole genome was: ' + str(auc2) +
                '\n')
    out2.append('D2: PleioPred COR for the whole genome was: ' +
                str(corr_inf2) + '\n')

    sp.savetxt('%s_y_' % (out_file_prefix) + '_D2.txt', y2)
    sp.savetxt('%s_prs' % (out_file_prefix) + '_PleioPred_D2.txt', prs_D2)

    #Now calibration
    ff_inf = open('%s_auc_' % (out_file_prefix) + '_PleioPred_D2.txt', "w")
    ff_inf.writelines(out2)
    ff_inf.close()

    f = gzip.open('%s_betas' % (out_file_prefix) + '_PleioPred_D1.pickled.gz',
                  'wb')
    cPickle.dump(avg_betas1, f, protocol=2)
    f.close()

    f = gzip.open('%s_betas' % (out_file_prefix) + '_PleioPred_D2.pickled.gz',
                  'wb')
    cPickle.dump(avg_betas2, f, protocol=2)
    f.close()
Exemple #8
0
 def progressUpdate():
     pb = ProgressBar(maxval = 1, widgets = [Bar(), ' ', Percentage(), ' ', ETA()], fd = sys.stdout)
     while p.progress < 1:
         pb.update(p.progress)
         time.sleep(0.5)
     pb.finish()
Exemple #9
0
def simulation(params, progressbar=True):
    ntry = params["ntry"]
    maxEpk = params["maxEpok"]
    normalize_data = params["normalize"]
    scale_data = params["scale"]
    crossvalidation_pct = params["cross_validation_percentage"]
    learningRate = params["algorithm"]["params"]["learning_rate"]
    moment = params["algorithm"]["params"]["momentum"]
    bet_simulation = params["bet_simulation"]

    # dataXAll, dataYAll, cotationAll = buildDataset()
    dataXAll, dataYAll, cotationAll = buildDataset(
        params["dataset"]["src"],
        params["dataset"]["features"],
        bet_simulation,
        mongolab=True)  ################ !
    dataXAll = [[float(x) for x in row] for row in dataXAll]
    dataYAll = [[float(x) for x in row] for row in dataYAll]
    nfeatures = len(dataXAll[0])

    #################################
    # game_issue = [1, 0, -1]
    # for i in range(0, len(dataXAll)):
    #     print ",".join(str(x) for x in [dataXAll[i] + cotationAll[i] + [game_issue[max(enumerate(dataYAll[i]), key=operator.itemgetter(1))[0]]]])
    # exit (-1)
    #################################

    # stat variable init
    winrate_history_train = list()
    winrate_history = list()
    money_post_crossval_history = list()
    money_during_crossval_history = list()
    odds_during_crossval_history = list()
    prediction_during_crossval_history = list()
    predict_interpret_during_crossval_history = list()
    expected_during_crossval_history = list()
    init_state = {
        "moneyBase": params["start_money"],
        "pct_bet": params["percentage_bet"],
        "simult_bet": params["simult_bet"]
    }
    # / stat variable init

    # Progress bar init
    if progressbar is True:
        widgets_pb = [Percentage(), ' ', Bar(), ' ', ETA()]
        pbar = ProgressBar(widgets=widgets_pb, maxval=ntry)
        pbar.start()
    # / Progress bar init

    for n in range(0, ntry):
        ds = SupervisedDataSet(nfeatures, 3)
        dataX = list(dataXAll)
        dataY = list(dataYAll)
        cotations = list(cotationAll)

        # crossvalidation data construction PICK LAST
        datapX = list()
        datapY = list()
        cotationpHDA = list()
        extracti = int(len(dataX) - (crossvalidation_pct * len(dataX)))
        datapX = dataX[extracti:len(dataX)]
        datapY = dataY[extracti:len(dataY)]
        cotationpHDA = cotations[extracti:len(cotations)]
        dataX = dataX[0:extracti]
        dataY = dataY[0:extracti]
        cotations = cotations[0:extracti]
        # / crossvalidation data construction

        # crossvalidation randomization
        if params["cross_validation_randomize"] is True:
            combined_crossval_data = zip(datapX, datapY, cotationpHDA)
            random.shuffle(combined_crossval_data)
            datapX, datapY, cotationpHDA = zip(*combined_crossval_data)
        # / crossvalidation randomization

        # scalarization && normalization -->
        # http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html &&
        # http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.Normalizer.html
        scalizer = None
        normalizer = None
        if scale_data == True:
            scalizer = preprocessing.StandardScaler().fit(dataX)
            dataX = scalizer.transform(dataX)
        if normalize_data == True:
            normalizer = preprocessing.Normalizer().fit(dataX)
            dataX = normalizer.transform(dataX)
        # / scalarization && normalization

        # training dataset construction
        for i in range(0, len(dataX)):
            ds.addSample(dataX[i], dataY[i])
        # / training dataset construction

        # nn && trainer construction
        net = buildNetwork(ds.indim, (ds.indim + ds.outdim) / 2,
                           (ds.indim + ds.outdim) / 2,
                           ds.outdim,
                           bias=True,
                           outclass=SoftmaxLayer)  # building the n
        trainer = BackpropTrainer(net,
                                  ds,
                                  learningrate=learningRate,
                                  momentum=moment,
                                  verbose=False)  # building the trainer
        # / nn && trainer construction

        # training
        trainer.trainUntilConvergence(
            maxEpochs=maxEpk)  # Train, until convergence
        # for epoch in range(0,1000):
        #         trainer.train()
        # / training

        # crossvalidation on training data
        post_crossval_state = crossvalidation(net, init_state, dataX, dataY,
                                              cotations, scalizer, normalizer,
                                              False)
        # / crossvalidation on training data

        # post crossvalidation training data data register
        winrate = post_crossval_state["win"] / float(len(dataX))
        winrate_history_train.append(winrate)
        # / post crossvalidation training data data register

        # crossvalidation
        post_crossval_state = crossvalidation(net, init_state, datapX, datapY,
                                              cotationpHDA, scalizer,
                                              normalizer, bet_simulation)
        # / crossvalidation

        # post unit crossvalidation data register
        winrate = post_crossval_state["win"] / float(len(datapX))
        winrate_history.append(winrate)
        money_post_crossval_history.append(post_crossval_state["money"])
        money_during_crossval_history.append(
            post_crossval_state["money_during_crossval_history"])
        odds_during_crossval_history.append(
            post_crossval_state["odds_during_crossval_history"])
        prediction_during_crossval_history.append(
            post_crossval_state["prediction_during_crossval_history"])
        predict_interpret_during_crossval_history.append(
            post_crossval_state["predict_interpret_during_crossval_history"])
        expected_during_crossval_history.append(
            post_crossval_state["expected_during_crossval_history"])
        # / post unit crossvalidation data register

        if progressbar is True:
            pbar.update(n + 1)
    # scipy.describe instantiation
    winrate_history_describe_train = scipy.stats.describe(
        winrate_history_train)
    winrate_history_describe = scipy.stats.describe(winrate_history)
    money_post_crossval_history_describe = scipy.stats.describe(
        money_post_crossval_history)
    # / scipy.describe instantiation
    if progressbar is True:
        pbar.finish()
    results = {
        "win_percentage_training": {
            "median":
            numpy.median(numpy.array(winrate_history_train)),
            "standard_deviation":
            numpy.std(numpy.array(winrate_history_train)),
            "variance":
            numpy.var(numpy.array(winrate_history_train)),
            "mode":
            scipy.stats.mstats.mode(
                [round(w, 2) for w in winrate_history_train]),
            "describe": {
                "nobs": winrate_history_describe_train[0],
                "min": winrate_history_describe_train[1][0],
                "max": winrate_history_describe_train[1][1],
                "mean": winrate_history_describe_train[2],
                "variance": winrate_history_describe_train[3],
                "skewness": winrate_history_describe_train[4],
                "kurtosis": winrate_history_describe_train[5]
            },
            "normal_test":
            scipy.stats.normaltest(winrate_history_train),
            "histogram":
            scipy.stats.histogram(winrate_history_train),
            "lst":
            winrate_history_train
        },
        "win_percentage": {
            "median": numpy.median(numpy.array(winrate_history)),
            "standard_deviation": numpy.std(numpy.array(winrate_history)),
            "variance": numpy.var(numpy.array(winrate_history)),
            "mode":
            scipy.stats.mstats.mode([round(w, 2) for w in winrate_history]),
            "describe": {
                "nobs": winrate_history_describe[0],
                "min": winrate_history_describe[1][0],
                "max": winrate_history_describe[1][1],
                "mean": winrate_history_describe[2],
                "variance": winrate_history_describe[3],
                "skewness": winrate_history_describe[4],
                "kurtosis": winrate_history_describe[5]
            },
            "normal_test": scipy.stats.normaltest(winrate_history),
            "histogram": scipy.stats.histogram(winrate_history),
            "lst": winrate_history
        },
        "money_during_cross_validation": {
            "min":
            min([
                item for sublist in money_during_crossval_history
                for item in sublist
            ]),
            "max":
            max([
                item for sublist in money_during_crossval_history
                for item in sublist
            ]),
            "lst":
            money_during_crossval_history
        },
        "odds_during_crossval_history": {
            "lst": odds_during_crossval_history
        },
        "prediction_during_crossval_history": {
            "lst": prediction_during_crossval_history
        },
        "predict_interpret_during_crossval_history": {
            "lst": predict_interpret_during_crossval_history
        },
        "expected_during_crossval_history": {
            "lst": expected_during_crossval_history
        },
        "money_post_cross_validation": {
            "median":
            numpy.median(numpy.array(money_post_crossval_history)),
            "standard_deviation":
            numpy.std(numpy.array(money_post_crossval_history)),
            "variance":
            numpy.var(numpy.array(money_post_crossval_history)),
            "mode":
            scipy.stats.mstats.mode(
                [round(m, 1) for m in money_post_crossval_history]),
            "describe": {
                "nobs": money_post_crossval_history_describe[0],
                "min": money_post_crossval_history_describe[1][0],
                "max": money_post_crossval_history_describe[1][1],
                "mean": money_post_crossval_history_describe[2],
                "variance": money_post_crossval_history_describe[3],
                "skewness": money_post_crossval_history_describe[4],
                "kurtosis": money_post_crossval_history_describe[5]
            },
            "normal_test":
            scipy.stats.normaltest(money_post_crossval_history),
            "histogram":
            scipy.stats.histogram(money_post_crossval_history),
            "lst":
            money_post_crossval_history
        }
    }
    return results
Exemple #10
0
def train_model(sess, xtrain, ytrain, tf_seed, np_seed, xvalid=False, yvalid=False, n_epochs=False):
    max_overfitting = setup['patience'] * setup['hyperparam_eval_interval']
    best_val_acc = float("inf")
    last_save = 0
    overfitting = 0

    N, dim = xtrain.shape
    iter_per_epoch = int(N / 100)
    input_shape = [None, dim]
    x = tf.placeholder(tf.float32, input_shape, name='x')
    y_ = tf.placeholder(tf.float32, [None, 1], name='y_')

    model = MNFMC(N, input_shape=input_shape, flows_q=FLAGS.fq, flows_r=FLAGS.fr, use_z=not FLAGS.no_z,
                  learn_p=FLAGS.learn_p, thres_var=FLAGS.thres_var, flow_dim_h=FLAGS.flow_h)

    tf.set_random_seed(tf_seed)
    np.random.seed(np_seed)
    y = model.predict(x)
    yd = model.predict(x, sample=False)
    pyx = y

    with tf.name_scope('KL_prior'):
        regs = model.get_reg()
        tf.summary.scalar('KL prior', regs)

    with tf.name_scope('cross_entropy'):
        cross_entropy = tf.reduce_mean(tf.losses.mean_squared_error(y, y_, weights=0.5))
        tf.summary.scalar('Loglike', cross_entropy)

    global_step = tf.Variable(0, trainable=False)
    if FLAGS.anneal:
        number_zero, original_zero = FLAGS.epzero, FLAGS.epochs / 2
        with tf.name_scope('annealing_beta'):
            max_zero_step = number_zero * iter_per_epoch
            original_anneal = original_zero * iter_per_epoch
            beta_t_val = tf.cast((tf.cast(global_step, tf.float32) - max_zero_step) / original_anneal, tf.float32)
            beta_t = tf.maximum(beta_t_val, 0.)
            annealing = tf.minimum(1., tf.cond(global_step < max_zero_step, lambda: tf.zeros((1,))[0], lambda: beta_t))
            tf.summary.scalar('annealing beta', annealing)
    else:
        annealing = 1.

    with tf.name_scope('lower_bound'):
        lowerbound = cross_entropy + annealing * regs
        tf.summary.scalar('Lower bound', lowerbound)

    train_step = tf.train.AdamOptimizer(learning_rate=FLAGS.lr).minimize(lowerbound, global_step=global_step)

    with tf.name_scope('accuracy'):
        accuracy = tf.reduce_mean(tf.square(yd - y_))
        tf.summary.scalar('Accuracy', accuracy)

    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', sess.graph)

    tf.add_to_collection('logits', y)
    tf.add_to_collection('logits_map', yd)
    tf.add_to_collection('accuracy', accuracy)
    tf.add_to_collection('x', x)
    tf.add_to_collection('y', y_)
    saver = tf.train.Saver(tf.global_variables())

    tf.global_variables_initializer().run()

    idx = np.arange(N)
    steps = 0
    model_dir = './models/mnf_lenet_mnist_fq{}_fr{}_usez{}_thres{}/model/'.format(FLAGS.fq, FLAGS.fr, not FLAGS.no_z,
                                                                                  FLAGS.thres_var)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    print('Will save model as: {}'.format(model_dir + 'model'))

    # Train
    if n_epochs:
        max_epochs = n_epochs
    else:
        max_epochs = FLAGS.epochs
    for epoch in range(max_epochs):
        widgets = ["epoch {}/{}|".format(epoch + 1, FLAGS.epochs), Percentage(), Bar(), ETA()]
        pbar = ProgressBar(iter_per_epoch, widgets=widgets)
        pbar.start()
        np.random.shuffle(idx)
        t0 = time.time()
        for j in range(iter_per_epoch):
            steps += 1
            pbar.update(j)
            batch = np.random.choice(idx, 100)
            if j == (iter_per_epoch - 1):
                summary, _ = sess.run([merged, train_step], feed_dict={x: xtrain[batch], y_: ytrain[batch]})
                train_writer.add_summary(summary, steps)
                train_writer.flush()
            else:
                sess.run(train_step, feed_dict={x: xtrain[batch], y_: ytrain[batch]})

        # If using validation data
        if not n_epochs:
            # the accuracy here is calculated by a crude MAP so as to have fast evaluation
            # it is much better if we properly integrate over the parameters by averaging across multiple samples
            tacc = sess.run(accuracy, feed_dict={x: xvalid, y_: yvalid})
            string = 'Epoch {}/{}, valid_acc: {:0.3f}'.format(epoch + 1, FLAGS.epochs, np.sqrt(tacc))
            string += ', dt: {:0.3f}'.format(time.time() - t0)
            print(string)
            sys.stdout.flush()

            if tacc < best_val_acc and epoch > last_save + setup['hyperparam_eval_interval']:
                print('saving best at epoch %d, rmse=%f' % (epoch, np.sqrt(tacc)))
                last_save = epoch
                best_val_acc = tacc
                string += ', model_save: True'
                saver.save(sess, model_dir + 'model')

            if tacc > best_val_acc:
                overfitting += 1
            else:
                overfitting = 0

            if overfitting > max_overfitting:
                break

    if n_epochs:
        last_save = n_epochs
        saver.save(sess, model_dir + 'model')

    return saver, model_dir, pyx, x, last_save
Exemple #11
0
def train():
    (xtrain, ytrain), (xvalid, yvalid), (_, _), y_std, y_mean = get_mc_data(FLAGS.dataset_name)
    min_tau = setup['tau_range'][FLAGS.dataset_name][0]
    max_tau = setup['tau_range'][FLAGS.dataset_name][1]

    # FIND BEST N_EPOCHS; TAU
    with tf.Graph().as_default() as g:
        with tf.Session() as sess:
            saver, model_dir, pyx, x, n_epochs = train_model(sess, xtrain, ytrain, FLAGS.seed, FLAGS.seed, xvalid, yvalid)

            # CRPS OPTIMIZE TO FIND STD DEV
            print "Finding optimal Tau"
            saver.restore(sess, model_dir + 'model')

            # Make optimization run take extra arguments
            optimize_fun = partial(run_tau_opt, sess, pyx, x, y_std, y_mean, xvalid, yvalid, False)
            tau_opt = gbrt_minimize(optimize_fun,
                                    [(min_tau, max_tau)],
                                    n_random_starts=100,
                                    n_calls=200
                                    )
            opt_tau = tau_opt.x[0]


            # CRPS OPTIMIZE TO FIND STD DEV
            print "Finding optimal CU Tau"
            # Make optimization run take extra arguments
            optimize_fun = partial(run_tau_opt, sess, pyx, x, y_std, y_mean, xvalid, yvalid, True)
            cutau_opt = gbrt_minimize(optimize_fun,
                                    [(min_tau, max_tau)],
                                    n_random_starts=100,
                                    n_calls=200
                                    )
            cu_opt_tau = cutau_opt.x[0]

            print "OPT TAU: {}. CRPS: {}".format(opt_tau, tau_opt.fun)
            print "CU OPT TAU: {}. CRPS: {}".format(cu_opt_tau, cutau_opt.fun)

    tf.reset_default_graph()

    # TRAIN AND EVALUATE FINAL MODEL 5 TIMES WITH DIFFERENT SEED:
    for final_seed in range(FLAGS.seed + 1, FLAGS.seed + 6):
        (xtrain, ytrain), (xtest, ytest), y_std, y_mean = get_mc_data(FLAGS.dataset_name, False)
        with tf.Graph().as_default() as g:
            with tf.Session() as sess:

                # Write csv file column headers if not yet written.
                plot_file_path = os.path.join(PLOT_RESULTS_PATH, FLAGS.dataset_name + '.txt')
                fid = open(plot_file_path, 'a')
                if sum(1 for line in open(plot_file_path)) == 0:
                    fid.write(',MNF const std dev,MNF std dev,y,yHat,run_count,dataset_split_seed\n')

                pll_file_path = os.path.join(RESULTS_PATH, FLAGS.dataset_name + '-pll.txt')
                fid_pll = open(pll_file_path, 'a')
                if sum(1 for line in open(pll_file_path)) == 0:
                    fid_pll.write('dataset_split,run_count,pll result,pll baseline,pll best,pll normalized\n')

                crps_file_path = os.path.join(RESULTS_PATH, FLAGS.dataset_name + '-crps.txt')
                fid_crps = open(crps_file_path, 'a')
                if sum(1 for line in open(crps_file_path)) == 0:
                    fid_crps.write('dataset_split,run_count,crps result,crps baseline,crps best,crps normalized\n')

                rmse_file_path = os.path.join(RESULTS_PATH, FLAGS.dataset_name + '-rmse.txt')
                fid_rmse = open(rmse_file_path, 'a')
                if sum(1 for line in open(rmse_file_path)) == 0:
                    fid_rmse.write('dataset_split,run_count,rmse\n')

                saver, model_dir, pyx, x, _ = train_model(sess, xtrain, ytrain, final_seed, final_seed, xvalid=False, yvalid=False, n_epochs=n_epochs)

                #EVALUATE TEST SET
                preds = np.zeros_like(ytest)
                all_preds = np.zeros([len(ytest), FLAGS.L])
                widgets = ["Sampling |", Percentage(), Bar(), ETA()]
                pbar = ProgressBar(FLAGS.L, widgets=widgets)
                pbar.start()
                for i in range(FLAGS.L):
                    pbar.update(i)
                    for j in range(int(xtest.shape[0] / 100)):
                        pyxi = sess.run(pyx, feed_dict={x: xtest[j * 100:(j + 1) * 100]})
                        preds[j * 100:(j + 1) * 100] += pyxi / FLAGS.L
                        all_preds[j * 100:(j + 1) * 100, i] = np.squeeze(pyxi * y_std + y_mean)
                    pyxi = sess.run(pyx, feed_dict={x: xtest[int(xtest.shape[0] / 100) * 100:]})
                    preds[int(xtest.shape[0] / 100) * 100:] += pyxi / FLAGS.L
                    all_preds[int(xtest.shape[0] / 100) * 100:, i] = np.squeeze(pyxi * y_std + y_mean)

                # FIND PLL AND CRPS
                samples = all_preds[:, :, newaxis].T.reshape(FLAGS.L, len(all_preds), 1)
                mean, var = np.mean(samples, axis=0), np.var(samples, axis=0) + opt_tau ** (-1)
                pll_res = pll(samples, ytest * y_std + y_mean, FLAGS.L, opt_tau)
                crps_res = crps(ytest * y_std + y_mean, mean, var)

                # FIND BASELINE PLL AND CRPS
                pll_baseline = pll(np.array([mean]), ytest * y_std + y_mean, 1, cu_opt_tau)
                crps_baseline = crps(ytest * y_std + y_mean, mean, cu_opt_tau**(-1))

                # FIND OPTIMAL PLL AND CRPS
                pll_best = pll_maximum(mean, ytest * y_std + y_mean)
                crps_best = crps_minimum(mean, ytest * y_std + y_mean)

                # GET NORMALIZED SCORES
                pll_norm = (pll_res - pll_baseline) / (pll_best - pll_baseline)
                crps_norm = (crps_res - crps_baseline) / (crps_best - crps_baseline)

                sample_accuracy = np.sqrt(np.mean((preds-ytest)*(preds-ytest)))
                print('Sample test accuracy: {}'.format(sample_accuracy))

                ytest_u = (ytest * y_std + y_mean)
                preds_u = (preds * y_std + y_mean)
                unnormalized_rmse = np.sqrt(np.mean((preds_u - ytest_u) * (preds_u - ytest_u)))
                print('Sample test accuracy (unnormalized): {}'.format(unnormalized_rmse))

                print('Test uncertainty quality metrics:')
                print "PLL: {}, PLL LOWER: {}, PLL UPPER: {}, NORM: {}".format(pll_res, pll_baseline, pll_best, pll_norm)
                print "CRPS: {}, CRPS LOWER: {}, CRPS UPPER: {}, NORM: {}".format(crps_res, crps_baseline, crps_best, crps_norm)

                all_preds_mean = all_preds.mean(axis=1)
                all_preds_std = all_preds.std(axis=1)

                # Write results to files
                for i in range(len(ytest)):
                    fid.write('%d,%f,%f,%f,%f,%d,%d\n' % (i, np.sqrt(cu_opt_tau**(-1)), all_preds_std[i], ytest_u[i], all_preds_mean[i], final_seed, FLAGS.dataset_split_seed))
                fid_rmse.write('%d,%d,%f\n' % (FLAGS.dataset_split_seed, final_seed, unnormalized_rmse))
                fid_pll.write('%d,%d %f,%f,%f,%f\n' % (FLAGS.dataset_split_seed, final_seed, pll_res, pll_baseline, pll_best, pll_norm))
                fid_crps.write('%d,%d,%f,%f,%f,%f\n' % (FLAGS.dataset_split_seed, final_seed, crps_res, crps_baseline, crps_best, crps_norm))
                fid.close()
                fid_rmse.close()
                fid_pll.close()
                fid_crps.close()

        tf.reset_default_graph()
Exemple #12
0
def main():

    global args
    args = parser.parse_args()

    cuda = args.cuda
    if cuda == 'true':
        cuda = True
    else:
        cuda = False

    task_name = args.task_name

    epoch_size = args.epoch_size
    batch_size = args.batch_size

    to_load = args.load_iter > 0
    load_iter = args.load_iter * to_load

    result_path = os.path.join(args.result_path, args.task_name)
    if args.style_A:
        result_path = os.path.join(result_path, args.style_A)
    result_path = os.path.join(result_path, args.model_arch + str(args.image_size))

    model_path = os.path.join(args.model_path, args.task_name)
    if args.style_A:
        model_path = os.path.join(model_path, args.style_A)
    model_path = os.path.join(model_path, args.model_arch + str(args.image_size))

    data_style_A, data_style_B, test_style_A, test_style_B = get_data(args)

    test_A = read_images(test_style_A, args.image_size)
    test_B = read_images(test_style_B, args.image_size)

    with torch.no_grad():
        test_A = Variable(torch.FloatTensor(test_A))
    with torch.no_grad():
        test_B = Variable(torch.FloatTensor(test_B))

    if not os.path.exists(result_path):
        os.makedirs(result_path)
    if not os.path.exists(model_path):
        os.makedirs(model_path)

    if to_load:
        ix = str(args.load_iter)
        generator_A = torch.load(os.path.join(model_path, 'model_gen_A-' + ix))
        generator_B = torch.load(os.path.join(model_path, 'model_gen_B-' + ix))
        discriminator_A = torch.load(os.path.join(model_path, 'model_dis_A-' + ix))
        discriminator_B = torch.load(os.path.join(model_path, 'model_dis_B-' + ix))
    else:
        generator_A = Generator()
        generator_B = Generator()
        discriminator_A = Discriminator()
        discriminator_B = Discriminator()

    if cuda:
        test_A = test_A.cuda()
        test_B = test_B.cuda()
        generator_A = generator_A.cuda()
        generator_B = generator_B.cuda()
        discriminator_A = discriminator_A.cuda()
        discriminator_B = discriminator_B.cuda()

    data_size = min(len(data_style_A), len(data_style_B))
    n_batches = (data_size // batch_size)

    recon_criterion = nn.MSELoss()
    gan_criterion = nn.BCELoss()
    feat_criterion = nn.HingeEmbeddingLoss()

    gen_params = chain(generator_A.parameters(), generator_B.parameters())
    dis_params = chain(discriminator_A.parameters(), discriminator_B.parameters())

    optim_gen = optim.Adam(gen_params, lr=args.learning_rate, betas=(0.5, 0.999), weight_decay=0.00001)
    optim_dis = optim.Adam(dis_params, lr=args.learning_rate, betas=(0.5, 0.999), weight_decay=0.00001)

    iters = 0

    gen_loss_total = []
    dis_loss_total = []

    for epoch in range(epoch_size):
        data_style_A, data_style_B = shuffle_data(data_style_A, data_style_B)

        widgets = ['epoch #%d|' % epoch, Percentage(), Bar(), ETA()]
        pbar = ProgressBar(maxval=n_batches, widgets=widgets)
        pbar.start()

        for i in range(n_batches):

            pbar.update(i)

            generator_A.zero_grad()
            generator_B.zero_grad()
            discriminator_A.zero_grad()
            discriminator_B.zero_grad()

            A_path = data_style_A[i * batch_size: (i+1) * batch_size]
            B_path = data_style_B[i * batch_size: (i+1) * batch_size]

            A = read_images(A_path, args.image_size)
            B = read_images(B_path, args.image_size)

            A = Variable(torch.FloatTensor(A))
            B = Variable(torch.FloatTensor(B))

            if cuda:
                A = A.cuda()
                B = B.cuda()

            AB = generator_B(A)
            BA = generator_A(B)

            ABA = generator_A(AB)
            BAB = generator_B(BA)

            # Reconstruction Loss
            recon_loss_A = recon_criterion(ABA, A)
            recon_loss_B = recon_criterion(BAB, B)

            # Real/Fake GAN Loss (A)
            A_dis_real, A_feats_real = discriminator_A(A)
            A_dis_fake, A_feats_fake = discriminator_A(BA)

            dis_loss_A, gen_loss_A = get_gan_loss(A_dis_real, A_dis_fake, gan_criterion, cuda)
            fm_loss_A = get_fm_loss(A_feats_real, A_feats_fake, feat_criterion)

            # Real/Fake GAN Loss (B)
            B_dis_real, B_feats_real = discriminator_B(B)
            B_dis_fake, B_feats_fake = discriminator_B(AB)

            dis_loss_B, gen_loss_B = get_gan_loss(B_dis_real, B_dis_fake, gan_criterion, cuda)
            fm_loss_B = get_fm_loss(B_feats_real, B_feats_fake, feat_criterion)

            # Total Loss

            if iters < args.gan_curriculum:
                rate = args.starting_rate
            else:
                rate = args.default_rate

            gen_loss_A_total = (gen_loss_B*0.1 + fm_loss_B*0.9) * (1.-rate) + recon_loss_A * rate
            gen_loss_B_total = (gen_loss_A*0.1 + fm_loss_A*0.9) * (1.-rate) + recon_loss_B * rate

            if args.model_arch == 'discogan':
                gen_loss = gen_loss_A_total + gen_loss_B_total
                dis_loss = dis_loss_A + dis_loss_B
            elif args.model_arch == 'recongan':
                gen_loss = gen_loss_A_total
                dis_loss = dis_loss_B
            elif args.model_arch == 'gan':
                gen_loss = (gen_loss_B*0.1 + fm_loss_B*0.9)
                dis_loss = dis_loss_B

            if iters % args.update_interval == 0:
                dis_loss.backward()
                optim_dis.step()
            else:
                gen_loss.backward()
                optim_gen.step()

            if iters % args.log_interval == 0:
                print("---------------------")
                print("GEN Loss:", as_np(gen_loss_A.mean()), as_np(gen_loss_B.mean()))
                print("Feature Matching Loss:", as_np(fm_loss_A.mean()), as_np(fm_loss_B.mean()))
                print("RECON Loss:", as_np(recon_loss_A.mean()), as_np(recon_loss_B.mean()))
                print("DIS Loss:", as_np(dis_loss_A.mean()), as_np(dis_loss_B.mean()))

            if iters % args.image_save_interval == 0:
                AB = generator_B(test_A)
                BA = generator_A(test_B)
                ABA = generator_A(AB)
                BAB = generator_B(BA)

                n_testset = min(test_A.size()[0], test_B.size()[0])

                subdir_path = os.path.join(result_path, str(iters / args.image_save_interval))

                if os.path.exists(subdir_path):
                    pass
                else:
                    os.makedirs(subdir_path)

                for im_idx in range(n_testset):
                    #A_val = test_A[im_idx].cpu().data.numpy().transpose(1,2,0) * 255.
                    #B_val = test_B[im_idx].cpu().data.numpy().transpose(1,2,0) * 255.
                    #BA_val = BA[im_idx].cpu().data.numpy().transpose(1,2,0)* 255.
                    #ABA_val = ABA[im_idx].cpu().data.numpy().transpose(1,2,0)* 255.
                    #AB_val = AB[im_idx].cpu().data.numpy().transpose(1,2,0)* 255.
                    #BAB_val = BAB[im_idx].cpu().data.numpy().transpose(1,2,0)* 255.

                    filename_prefix = os.path.join (subdir_path, str(im_idx))
                    scipy.misc.imsave(filename_prefix + '.A.jpg', img4save(test_A[im_idx]))
                    scipy.misc.imsave(filename_prefix + '.B.jpg', img4save(test_B[im_idx]))
                    scipy.misc.imsave(filename_prefix + '.BA.jpg', img4save(BA[im_idx]))
                    scipy.misc.imsave(filename_prefix + '.AB.jpg', img4save(AB[im_idx]))
                    scipy.misc.imsave(filename_prefix + '.ABA.jpg', img4save(ABA[im_idx]))
                    scipy.misc.imsave(filename_prefix + '.BAB.jpg', img4save(BAB[im_idx]))

            if iters % args.model_save_interval == 0:
                torch.save(generator_A, os.path.join(model_path, 'model_gen_A-' + str((iters / args.model_save_interval) + load_iter)))
                torch.save(generator_B, os.path.join(model_path, 'model_gen_B-' + str((iters / args.model_save_interval) + load_iter)))
                torch.save(discriminator_A, os.path.join(model_path, 'model_dis_A-' + str((iters / args.model_save_interval) + load_iter)))
                torch.save(discriminator_B, os.path.join(model_path, 'model_dis_B-' + str((iters / args.model_save_interval) + load_iter)))

            iters += 1
Exemple #13
0
 def __init__(self, maxval=0):
     widgets = [Percentage(), ' ', Bar(marker='=', left='[', right=']'), ' ', ETA()]
     super(ProgressBarContext, self).__init__(widgets=widgets, maxval=maxval, fd=sys.stdout)
Exemple #14
0
    def do_distribute(self, args):
        """distribute:\n  copy local file to a group of servers into a specified directory"""
        args = args.split()
        if len(args) < 2:
            error(
                "Usage: distribute <conductor_expression> <local_filename> [remote_dir=%s]"
                % self.default_remote_dir)
            return
        expr, filename = args[:2]
        try:
            hosts = self.conductor.resolve(expr)
        except ParseException as e:
            error("Invalid conductor expression: %s" % str(e))
            return

        if len(hosts) == 0:
            error("Empty hostlist")
            return
        if not os.path.isfile(filename):
            error("%s is not a file or doesn't exist" % filename)
            return

        if len(args) > 2:
            remote_dir = args[2]
        else:
            remote_dir = self.default_remote_dir

        results = {"error": [], "success": [], "total": 0}
        errors = defaultdict(list)

        progress = None
        if self.progressbar:
            from progressbar import ProgressBar, Percentage, Bar, ETA, FileTransferSpeed
            progress = ProgressBar(widgets=[
                "Running: ",
                Percentage(), ' ',
                Bar(marker='.'), ' ',
                ETA(), ' ',
                FileTransferSpeed()
            ],
                                   maxval=len(hosts))

        def worker(host):
            p = Popen(
                [
                    "scp",
                    "-B",  # prevents asking for passwords
                    filename,
                    "%s@%s:%s" % (self.user, host, remote_dir)
                ],
                stdout=PIPE,
                stderr=PIPE)
            o, e = p.communicate()
            if p.poll() == 0:
                results["success"].append(host)
            else:
                results["error"].append(host)
                errors[e].append(host)

            results["total"] += 1
            if self.progressbar:
                progress.update(results["total"])

        if self.progressbar:
            progress.start()

        pool = Pool()
        for host in hosts:
            pool.start(Greenlet(worker, host))
        pool.join()

        if self.progressbar:
            progress.finish()

        if len(results["success"]) > 0:
            msg = "Successfully distributed to %d hosts" % len(
                results["success"])
            cprint(msg, "green")
        if len(results["error"]) > 0:
            cprint("There were errors distributing file", "red")
            for output, hosts in errors.items():
                msg = " %s    " % ','.join(hosts)
                table_width = min([len(msg) + 2, terminal_size()[0]])
                cprint("=" * table_width, "blue", attrs=["bold"])
                cprint(msg, "blue", attrs=["bold"])
                cprint("=" * table_width, "blue", attrs=["bold"])
                print(output)
Exemple #15
0
    def populate(self, target_model_name, target_database, source_model_name,
                 source_database, size, limit, offset, inchi_conversion, idx,
                 display_offset):

        from django.db import connections
        from django.db import transaction
        from django.db.models import get_model
        from django.db.utils import DatabaseError, IntegrityError

        source_model = get_model(self.app_name, source_model_name)
        target_model = get_model(self.app_name, target_model_name)

        target_conn = connections[target_database]

        source_pk = source_model._meta.pk.name

        writer = Writer(self.term, (idx, 0))
        pbar = ProgressBar(widgets=[
            '{0} ({1}) [{2}-{3}]: '.format(target_model_name,
                                           idx - display_offset + 1, offset,
                                           offset + limit),
            Percentage(), ' (',
            Counter(), ') ',
            Bar(marker=RotatingMarker()), ' ',
            ETA()
        ],
                           fd=writer,
                           maxval=limit).start()

        inchi_kwargs = {}

        if inchi_conversion == 'indigo':
            indigo_obj = indigo.Indigo()
            indigo_inchi_obj = indigo_inchi.IndigoInchi(indigo_obj)
            inchi_kwargs = {"inchiObj": indigo_inchi_obj}

        elif inchi_conversion == 'rdkit' and self.verbosity < 1:
            from rdkit import rdBase
            rdBase.DisableLog('rdApp.error')
            from rdkit import RDLogger
            lg = RDLogger.logger()
            lg.setLevel(RDLogger.CRITICAL)

        inchi_converter = inchi_converters.get(inchi_conversion)
        last_pk = None

        for i in range(offset, offset + limit, size):
            success = 0
            failure = 0
            empty = 0
            ignored = 0
            transaction.commit_unless_managed(using=target_database)
            transaction.enter_transaction_management(using=target_database)
            transaction.managed(True, using=target_database)
            with target_conn.constraint_checks_disabled():

                try:

                    chunk_size = min(size, limit + offset - i)
                    original_data = None

                    if not last_pk:
                        if i:
                            last_pk = source_model.objects.using(
                                source_database).order_by(source_pk).only(
                                    source_pk).values_list(source_pk)[i][0]
                        else:
                            original_data = source_model.objects.using(
                                source_database).order_by(
                                    source_pk).values_list(
                                        'pk', 'standardinchi')[:chunk_size]

                    if not original_data:
                        original_data = source_model.objects.using(
                            source_database).order_by(source_pk).values_list(
                                'pk', 'standardinchi').filter(
                                    pk__gt=last_pk)[:chunk_size]

                    last_pk = original_data[chunk_size - 1][0]

                    target_data = []
                    for pk, inchi in original_data:
                        if not inchi:
                            empty += 1
                            continue

                        if int(pk) in self.ignores:
                            ignored += 1
                            continue

                        ctab = self.convert_inchi(inchi_converter, pk, inchi,
                                                  inchi_kwargs)
                        if not ctab:
                            failure += 1
                            continue
                        target_data.append(
                            target_model(pk=int(pk), molfile=ctab))
                        success += 1

                    target_model.objects.using(target_database).bulk_create(
                        target_data)

                except IntegrityError as e:
                    if self.verbosity >= 1:
                        self.stderr.write(
                            "ERROR: integrity error ({0}) occurred when processing chunk {1}-{2}"
                            .format(e.message, i, i + size))
                    transaction.rollback(using=target_database)
                    transaction.leave_transaction_management(
                        using=target_database)
                    continue

                except DatabaseError as e:
                    if self.verbosity >= 1:
                        self.stderr.write(
                            "ERROR: database error ({0}) occurred when processing chunk {1}-{2}"
                            .format(e.message, i, i + size))
                    transaction.rollback(using=target_database)
                    transaction.leave_transaction_management(
                        using=target_database)
                    raise e

            pbar.update(i - offset + 1)
            transaction.commit(using=target_database)
            transaction.leave_transaction_management(using=target_database)

            self.total_success += success
            self.total_failure += failure
            self.total_empty += empty
            self.total_ignored += ignored

        pbar.update(limit)
        pbar.finish()
Exemple #16
0
def fuzz_file(num_iterations, file_path, mcalls, validator=None):
    '''
        Call an external fuzzer (hardcoded with radamsa, for now) to fuzz/mutate 
        the input file (file_path) for a number of times (num_iterations). Each 
        time it makes a multicall on different engines. 
    '''
    bar = ProgressBar(widgets=[
        'fuzzing ' + file_path + ' ',
        Percentage(), ' ',
        Bar(marker=RotatingMarker()), ' ',
        ETA(), ' '
    ])
    #pylint: disable=W0612
    num_it = 1
    while num_it <= num_iterations:

        # fuzz the file with radamsa
        fuzzed_file_path = os.path.join(tempfile.gettempdir(),
                                        'temp_filefuzzed')
        cmd = "radamsa --output {} {}".format(fuzzed_file_path, file_path)
        args = shlex.split(cmd)
        try:
            p = subprocess.Popen(args,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
            p.communicate()
        except FileNotFoundError as error:
            if 'radamsa' in str(error):
                raise Exception(
                    'Please check if radamsa is installed on your environment (see README.md file).'
                )
        except Exception as error:
            raise Exception('Error:', error)

        # check if file is valid
        if validator is not None:
            validation_error = validator(fuzzed_file_path)
            if validation_error:
                res = multicall.Results(fuzzed_file_path, validation_error)
                mcalls.notify(res)
                continue  # skip this file

        # check discrepancy
        try:
            res = multicall.callAll(fuzzed_file_path)
            res.path_name = os.path.join(
                constants.logs_dir, 'fuzzed_' + ntpath.basename(file_path))
            if mcalls.notify(
                    res
            ):  # true if it is interesting and distinct. in this case, save the file
                ## get first name of file...
                shutil.copy(fuzzed_file_path, res.path_name)
        except UnicodeDecodeError as exc:
            # TODO: It is silly but we can't handle properly non-unicode outputs
            # just because the .decode('utf-8') to convert bytes into strings
            # raises this exception when the non-unicode char is mapped.
            continue

        bar.update(num_it)
        num_it += 1
    bar.finish()
Exemple #17
0
import multiprocessing
import mmap
from atpbar import atpbar
from progressbar import ProgressBar, Percentage, Bar, ETA, AdaptiveETA
from concurrent.futures import ThreadPoolExecutor
import numpy as np

from randmac import RandMac
from scapy.layers.inet import IP, TCP, UDP, ICMP
from scapy.layers.l2 import Ether
from scapy.packet import Raw
from scapy.utils import wrpcap
from scapy.volatile import RandIP, RandString
from scapy.all import *

widgets = [Percentage(), ' ', Bar(), ' ', ETA(), ' ', AdaptiveETA()]

pbar_update_value = 0
total_tasks = 0


class RawPcapReaderFD(RawPcapReader):
    """A stateful pcap reader. Each packet is returned as a string"""
    def __init__(self, fd):
        self.filename = "dummy"
        try:
            self.f = fd
            magic = self.f.read(4)
        except IOError:
            self.f = fd
            magic = self.f.read(4)
Exemple #18
0
def device_29c0x0_read(ser, max_num_of_bytes_read, color):
    widgets = ['Reading EEPROM: ', Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA(), ' ',
               FileTransferSpeed()]

    # Initiate READ sequence.
    ser.write('READ;')
    time.sleep(0.1)

    while ser.inWaiting() == 0:
        pass

    in_buffer = ''

    while ser.inWaiting() > 0:
        in_buffer += ser.read(1)
    print('\nStart reading sequence...')

    if in_buffer == 'START':
        print('OK.')
    else:
        print(color.TEXT_RED + 'Start reading sequence - error.' + color.TEXT_GREEN)
        print(in_buffer + '\n')

    max_num_of_bytes_read_str = '%s;' % max_num_of_bytes_read

    # Initiate a number of bytes to be read (max address) - (for example: 5931;).
    ser.write(max_num_of_bytes_read_str)
    time.sleep(0.1)

    while ser.inWaiting() == 0:
        pass
    in_buffer = ''

    while ser.inWaiting() > 0:
        in_buffer += ser.read(1)
    print('\nThe number of bytes to be read accepted...')

    if in_buffer == 'DONE':
        print('OK.')
    else:
        print(color.TEXT_RED + 'Bytes-to-be-read receiving - error.' + color.TEXT_GREEN)
        print(in_buffer + '\n')

    # in_buffer = ''
    read_data_string = ''
    num_of_bytes_read = 0

    print(color.TEXT_CYAN + color.TEXT_BRIGHT)
    # maxval=max_num_of_bytes_read tested! :)
    pbar = ProgressBar(widgets=widgets, maxval=max_num_of_bytes_read).start()

    # Start reading data from EEPROM.
    while num_of_bytes_read < max_num_of_bytes_read:
        while ser.inWaiting() > 0:
            read_data_string += ser.read(1)
            # time.sleep(0.01)
            num_of_bytes_read += 1
            pbar.update(num_of_bytes_read)
    pbar.finish()

    print('%s' % num_of_bytes_read)
    print(color.TEXT_GREEN)

    memory_chksum = twos_complement_chksum(read_data_string)

    return memory_chksum
Exemple #19
0
def classify(model_dir,
             n_inference_steps=20, n_inference_samples=20,
             dim_hs=[100], h_act='T.nnet.softplus',
             learning_rate=0.0001, learning_rate_schedule=None,
             dropout=0.1, batch_size=100, l2_decay=0.002,
             epochs=100,
             optimizer='rmsprop', optimizer_args=dict(),
             center_input=True, name='classifier'):
    out_path = model_dir

    inference_args = dict(
        inference_method='adaptive',
        inference_rate=0.1,
    )

    # ========================================================================
    print 'Loading model'

    model_file = glob(path.join(model_dir, '*best*npz'))[0]

    models, model_args = load_model(model_file, unpack_sbn, **inference_args)

    model = models['sbn']
    model.set_tparams()

    dataset = model_args['dataset']
    dataset_args = model_args['dataset_args']
    if dataset == 'mnist':
        dataset_args['binarize'] = True
        dataset_args['source'] = '/export/mialab/users/dhjelm/data/mnist.pkl.gz'

    train, valid, test = load_data(dataset, batch_size, batch_size, batch_size,
                                   **dataset_args)

    mlp_args = dict(
        dim_hs=dim_hs,
        h_act=h_act,
        dropout=dropout,
        out_act=train.acts['label']
    )

    X = T.matrix('x', dtype=floatX)
    Y = T.matrix('y', dtype=floatX)
    trng = RandomStreams(random.randint(0, 1000000))

    if center_input:
        print 'Centering input with train dataset mean image'
        X_mean = theano.shared(train.mean_image.astype(floatX), name='X_mean')
        X_i = X - X_mean
    else:
        X_i = X

    # ========================================================================
    print 'Loading MLP and forming graph'

    (qs, i_costs), _, updates = model.infer_q(
            X_i, X, n_inference_steps, n_inference_samples=n_inference_samples)

    q0 = qs[0]
    qk = qs[-1]

    constants = [q0, qk]
    dim_in = model.dim_h
    dim_out = train.dims['label']

    mlp0_args = deepcopy(mlp_args)
    mlp0 = MLP(dim_in, dim_out, name='classifier_0', **mlp0_args)
    mlpk_args = deepcopy(mlp_args)
    mlpk = MLP(dim_in, dim_out, name='classifier_k', **mlpk_args)
    mlpx_args = deepcopy(mlp_args)
    mlpx = MLP(train.dims[str(dataset)], dim_out, name='classifier_x', **mlpx_args)
    tparams = mlp0.set_tparams()
    tparams.update(**mlpk.set_tparams())
    tparams.update(**mlpx.set_tparams())

    print_profile(tparams)

    p0 = mlp0(q0)
    pk = mlpk(qk)
    px = mlpx(X_i)

    # ========================================================================
    print 'Getting cost'

    cost0 = mlp0.neg_log_prob(Y, p0).sum(axis=0)
    costk = mlpk.neg_log_prob(Y, pk).sum(axis=0)
    costx = mlpx.neg_log_prob(Y, px).sum(axis=0)

    cost = cost0 + costk + costx
    extra_outs = []
    extra_outs_names = ['cost']

    if l2_decay > 0.:
        print 'Adding %.5f L2 weight decay' % l2_decay
        mlp0_l2_cost = mlp0.get_L2_weight_cost(l2_decay)
        mlpk_l2_cost = mlpk.get_L2_weight_cost(l2_decay)
        mlpx_l2_cost = mlpx.get_L2_weight_cost(l2_decay)
        cost += mlp0_l2_cost + mlpk_l2_cost + mlpx_l2_cost
        extra_outs += [mlp0_l2_cost, mlpk_l2_cost, mlpx_l2_cost]
        extra_outs_names += ['MLP0 L2 cost', 'MLPk L2 cost', 'MLPx L2 cost']

    # ========================================================================
    print 'Extra functions'
    error0 = (Y * (1 - p0)).sum(1).mean()
    errork = (Y * (1 - pk)).sum(1).mean()
    errorx = (Y * (1 - px)).sum(1).mean()
    
    f_test_keys = ['Error 0', 'Error k', 'Error x', 'Cost 0', 'Cost k', 'Cost x']
    f_test = theano.function([X, Y], [error0, errork, errorx, cost0, costk, costx])
    
    # ========================================================================
    print 'Setting final tparams and save function'

    all_params = OrderedDict((k, v) for k, v in tparams.iteritems())

    tparams = OrderedDict((k, v)
        for k, v in tparams.iteritems()
        if (v not in updates.keys() or v not in excludes))

    print 'Learned model params: %s' % tparams.keys()
    print 'Saved params: %s' % all_params.keys()

    def save(tparams, outfile):
        d = dict((k, v.get_value()) for k, v in all_params.items())

        d.update(
            dim_in=dim_in,
            dim_out=dim_out,
            dataset=dataset, dataset_args=dataset_args,
            **mlp_args
        )
        np.savez(outfile, **d)

     # ========================================================================
    print 'Getting gradients.'
    grads = T.grad(cost, wrt=itemlist(tparams),
                   consider_constant=constants)

    # ========================================================================
    print 'Building optimizer'
    lr = T.scalar(name='lr')
    f_grad_shared, f_grad_updates = eval('op.' + optimizer)(
        lr, tparams, grads, [X, Y], cost, extra_ups=updates,
        extra_outs=extra_outs, **optimizer_args)

    monitor = SimpleMonitor()

    try:
        epoch_t0 = time.time()
        s = 0
        e = 0

        widgets = ['Epoch {epoch} ({name}, '.format(epoch=e, name=name),
                   Timer(), '): ', Bar()]
        epoch_pbar = ProgressBar(widgets=widgets, maxval=train.n).start()
        training_time = 0

        while True:
            try:
                x, y = train.next()
                
                if train.pos == -1:
                    epoch_pbar.update(train.n)
                else:
                    epoch_pbar.update(train.pos)

            except StopIteration:
                print
                epoch_t1 = time.time()
                training_time += (epoch_t1 - epoch_t0)
                valid.reset()

                widgets = ['Validating: ',
                          Percentage(), ' (', Timer(), ')']
                pbar    = ProgressBar(widgets=widgets, maxval=valid.n).start()
                results_train = OrderedDict()
                results_valid = OrderedDict()
                while True:
                    try:
                        x_valid, y_valid = valid.next()
                        x_train, y_train = train.next()

                        r_train = f_test(x_train, y_train)
                        r_valid = f_test(x_valid, y_valid)
                        results_i_train = dict((k, v) for k, v in zip(f_test_keys, r_train))
                        results_i_valid = dict((k, v) for k, v in zip(f_test_keys, r_valid))
                        update_dict_of_lists(results_train, **results_i_train)
                        update_dict_of_lists(results_valid, **results_i_valid)

                        if valid.pos == -1:
                            pbar.update(valid.n)
                        else:
                            pbar.update(valid.pos)

                    except StopIteration:
                        print
                        break

                def summarize(d):
                    for k, v in d.iteritems():
                        d[k] = np.mean(v)

                summarize(results_train)
                summarize(results_valid)

                monitor.update(**results_train)
                monitor.update(dt_epoch=(epoch_t1-epoch_t0),
                               training_time=training_time)
                monitor.update_valid(**results_valid)
                monitor.display()

                monitor.save(path.join(
                    out_path, '{name}_monitor.png').format(name=name))
                monitor.save_stats(path.join(
                    out_path, '{name}_monitor.npz').format(name=name))
                monitor.save_stats_valid(path.join(
                    out_path, '{name}_monitor_valid.npz').format(name=name))

                e += 1
                epoch_t0 = time.time()

                valid.reset()
                train.reset()

                if learning_rate_schedule is not None:
                    if e in learning_rate_schedule.keys():
                        lr = learning_rate_schedule[e]
                        print 'Changing learning rate to %.5f' % lr
                        learning_rate = lr

                widgets = ['Epoch {epoch} ({name}, '.format(epoch=e, name=name),
                           Timer(), '): ', Bar()]
                epoch_pbar = ProgressBar(widgets=widgets, maxval=train.n).start()

                continue

            if e > epochs:
                break

            rval = f_grad_shared(x, y)

            if check_bad_nums(rval, extra_outs_names):
                print rval
                print np.any(np.isnan(mlpk.W0.get_value()))
                print np.any(np.isnan(mlpk.b0.get_value()))
                print np.any(np.isnan(mlpk.W1.get_value()))
                print np.any(np.isnan(mlpk.b1.get_value()))
                raise ValueError('Bad number!')

            f_grad_updates(learning_rate)
            s += 1

    except KeyboardInterrupt:
        print 'Training interrupted'

    test.reset()

    widgets = ['Testing: ',
               Percentage(), ' (', Timer(), ')']
    pbar    = ProgressBar(widgets=widgets, maxval=test.n).start()
    results_test = OrderedDict()
    while True:
        try:
            x_test, y_test = test.next()
            r_test = f_test(x_test, y_test)
            results_i_test = dict((k, v) for k, v in zip(f_test_keys, r_test))
            update_dict_of_lists(results_test, **results_i_test)
            if test.pos == -1:
                pbar.update(test.n)
            else:
                pbar.update(test.pos)

        except StopIteration:
            print
            break

    def summarize(d):
        for k, v in d.iteritems():
            d[k] = np.mean(v)

    summarize(results_test)
    print 'Test results:'
    monitor.simple_display(results_test)

    if out_path is not None:
        outfile = path.join(out_path, '{name}_{t}.npz'.format(name=name, t=int(time.time())))
        last_outfile = path.join(out_path, '{name}_last.npz'.format(name=name))

        print 'Saving'
        save(tparams, outfile)
        save(tparams, last_outfile)
        print 'Done saving.'

    print 'Bye bye!'
Exemple #20
0
def build_progressbar(name, **kwargs):
    """Return configured :class:`ProgressBar` instance"""
    from progressbar import Counter, ProgressBar, Timer, Percentage, ETA
    widgets = [name, Percentage(), ' ', ETA(), ' ',
            Counter(), ' results(', Timer(), ')']
    return ProgressBar(widgets=widgets, **kwargs)
Exemple #21
0
    def fit(self, X, y, Xtest=None, ytest=None):
        """Fit."""
        input_dim = X.shape[1]
        # set different data preparation schemes basing on what kind of NN is it
        layers = [i.keys()[0] for i in self.architecture]
        self.isCNN = 'Conv' in layers
        self.isRecurrent = 'GRU' in layers or 'LSTM' in layers
        if self.isCNN:
            self.addDelay = delay_preds
            self.training_params['num_strides'] = self.delay // self.skip
        elif self.isRecurrent:
            self.addDelay = delay_preds_2d
        else:
            input_dim *= self.delay / self.skip
            input_dim = int(input_dim)
            self.addDelay = delay_preds

        # create the model
        self.model = buildNN(self.architecture, self.training_params,
                             input_dim)
        # print self.model.get_config()

        widgets = [
            'Training : ',
            Percentage(), ' ',
            Bar(marker=RotatingMarker()), ' ',
            ETA(), ' '
        ]
        pbar = ProgressBar(widgets=widgets, maxval=self.majorEpochs)
        pbar.start()

        # train the model on a portion of training data; that portion is changed each majorEpoch
        for majorEpoch in range(self.majorEpochs):
            startingPoint = majorEpoch % self.partsTrain or self.mdlNr % self.partsTrain
            if self.jump is not None:
                trainData = self.addDelay(X,
                                          delay=self.delay,
                                          skip=self.skip,
                                          subsample=self.partsTrain,
                                          start=startingPoint,
                                          jump=self.jump)
            else:
                trainData = self.addDelay(X,
                                          delay=self.delay,
                                          skip=self.skip,
                                          subsample=self.partsTrain,
                                          start=startingPoint)

            if self.isCNN:
                trainData = trainData.reshape(
                    (trainData.shape[0], 1, trainData.shape[1], 1))
            targets = y[startingPoint::self.partsTrain]

            trainData = trainData[::self.subsample]
            targets = targets[::self.subsample]

            # print 'trainData:', trainData.shape
            # print 'targets:', targets.shape
            # print theano.printing.debugprint(self.model)
            self.model.fit(trainData,
                           targets,
                           epochs=self.smallEpochs,
                           batch_size=512,
                           verbose=0)

            trainData = None

            pbar.update(majorEpoch)

            if self.verbose and majorEpoch % self.checkEveryEpochs == 0:
                print("Total epochs: %d" % (self.smallEpochs *
                                            (majorEpoch + 1)))
                if Xtest is not None and ytest is not None:
                    pred = self._predict_proba_train(Xtest)
                    score = np.mean(
                        roc_auc_score(ytest[0::self.partsTest], pred))
                    print("Test AUC : %.5f" % (score))
                    pred = None

        if self.verbose:
            print('Training finished after %d epochs' % (self.smallEpochs *
                                                         (majorEpoch + 1)))
Exemple #22
0
 def start(self, name, count):
     self.pbar = ProgressBar(widgets=[name, Percentage(), Bar()],
                             maxval=count)
     self.pbar.start()
Exemple #23
0
    def migrate2(self):
        session = Session()

        try:
            from progressbar import ProgressBar, Percentage, Bar, ETA
        except:
            print 'Critical: progressbar library not found, try running `bin/easy_install progressbar` ?'
            return

        class Seen(Base):

            __tablename__ = 'seen'

            id = Column(Integer, primary_key=True)
            field = Column(String)
            value = Column(String, index=True)
            task = Column('feed', String)
            added = Column(DateTime)

            def __init__(self, field, value, task):
                self.field = field
                self.value = value
                self.task = task
                self.added = datetime.now()

            def __str__(self):
                return '<Seen(%s=%s)>' % (self.field, self.value)

        print ''

        # REPAIR / REMOVE DUPLICATES
        index = 0
        removed = 0
        total = session.query(Seen).count() + 1

        widgets = [
            'Repairing - ',
            ETA(), ' ',
            Percentage(), ' ',
            Bar(left='[', right=']')
        ]
        bar = ProgressBar(widgets=widgets, maxval=total).start()

        for seen in session.query(Seen).all():
            index += 1
            if index % 10 == 0:
                bar.update(index)
            amount = 0
            for dupe in session.query(Seen).filter(Seen.value == seen.value):
                amount += 1
                if amount > 1:
                    removed += 1
                    session.delete(dupe)
        bar.finish()

        # MIGRATE
        total = session.query(Seen).count() + 1
        widgets = [
            'Upgrading - ',
            ETA(), ' ',
            Percentage(), ' ',
            Bar(left='[', right=']')
        ]
        bar = ProgressBar(widgets=widgets, maxval=total).start()

        index = 0
        for seen in session.query(Seen).all():
            index += 1
            if not index % 10:
                bar.update(index)
            se = SeenEntry(u'N/A', seen.task, u'migrated')
            se.added = seen.added
            se.fields.append(SeenField(seen.field, seen.value))
            session.add(se)
        bar.finish()

        session.execute('drop table seen;')
        session.commit()
Exemple #24
0
    def enumSMB(self):
        progBar = ProgressBar(
            widgets=['SMBConnection test: ',
                     Percentage(),
                     Bar(), ETA()],
            maxval=len(self.smbShareCandidates)).start()
        prog = 0
        try:
            for dnsname in self.smbShareCandidates:
                try:
                    # Changing default timeout as shares should respond withing 5 seconds if there is a share
                    # and ACLs make it available to self.user with self.passwd
                    smbconn = smbconnection.SMBConnection('\\\\' +
                                                          str(dnsname),
                                                          str(dnsname),
                                                          timeout=5)
                    smbconn.login(self.domuser, self.passwd)
                    dirs = smbconn.listShares()
                    self.smbBrowseable[str(dnsname)] = {}
                    for share in dirs:
                        self.smbBrowseable[str(dnsname)][str(
                            share['shi1_netname']).rstrip('\0')] = ''
                        try:
                            _ = smbconn.listPath(
                                str(share['shi1_netname']).rstrip('\0'), '*')
                            self.smbBrowseable[str(dnsname)][str(
                                share['shi1_netname']).rstrip('\0')] = True
                        except (SessionError, UnicodeEncodeError,
                                NetBIOSError):
                            # Didnt have permission, all good
                            # Im second guessing the below adding to the JSON file as we're only interested in the listable directories really
                            #self.smbBrowseable[str(dnsname)][str(share['shi1_netname']).rstrip('\0')] = False
                            continue
                    smbconn.logoff()
                    progBar.update(prog + 1)
                    prog += 1
                except (socket.error, NetBIOSTimeout, SessionError,
                        NetBIOSError):
                    # TODO: Examine why we sometimes get:
                    # impacket.smbconnection.SessionError: SMB SessionError: STATUS_PIPE_NOT_AVAILABLE
                    # on healthy shares. It seems to be reported with CIF shares
                    progBar.update(prog + 1)
                    prog += 1
                    continue
        except ValueError:
            # We reached end of progressbar, continue since we finish below
            pass
        progBar.finish()
        print('')

        availDirs = []
        for key, value in self.smbBrowseable.items():
            for _, v in value.items():
                if v:
                    availDirs.append(key)

        if len(self.smbShareCandidates) == 1:
            print(
                '[ ' + colored('OK', 'green') +
                ' ] Searched {0} share and {1} with {2} subdirectories/files is browseable by {3}'
                .format(len(self.smbShareCandidates),
                        len(self.smbBrowseable.keys()), len(availDirs),
                        self.domuser))
        else:
            print(
                '[ ' + colored('OK', 'green') +
                ' ] Searched {0} shares and {1} with {2} subdirectories/file sare browseable by {3}'
                .format(len(self.smbShareCandidates),
                        len(self.smbBrowseable.keys()), len(availDirs),
                        self.domuser))
        if len(self.smbBrowseable.keys()) > 0:
            with open('{0}-open-smb.json'.format(self.server), 'w') as f:
                json.dump(self.smbBrowseable, f, indent=4, sort_keys=False)
            print('[ ' + colored('OK', 'green') +
                  ' ] Wrote browseable shares to {0}-open-smb.json'.format(
                      self.server))
def NodeDic(results, edge_info, node_info):
    '''
    Function takes the results of running a query, NETS edge label information, and a list of node information (list[0]
    contains the NETS nodes label triples, list[1] contains the contains the NETS nodes identifier triples). The
    function returns a list of dictionaries where list[0] contains a nested dictionary where keys are bio entity
    identifiers and the values are the the human readable labels and database identifiers; list[1] contains a dictionary
    where the bio node is the key and the value is a set of possible NETS node types for that node.
    :param results: json file containing the query results from endpoint
    :param edge_info: dictionary where the keys are the NETS edges and the values are the edge labels
    :param node_info: a list of node information (list[0] contains the NETS nodes label triples, list[1] contains the
    contains the NETS nodes identifier triples)
    :return: a list of dictionaries: list[0] contains a nested dictionary where keys are bio entity identifiers and the
    values are the the human readable labels and database identifiers; list[1] contains a dictionary where the bio node is
    the key and the value is a set of possible NETS node types for that node
    '''

    print 'Start building OWL-NETs metadata dictionary'

    # creates a map to store NETS node type information
    node_type = {}

    # creates a map to identify which query variables represent the BIO world ID, label, and ICE ID
    node_labeler = {}

    # assign variables needed for node dictionary
    NETS = set([x.strip('?') for y in edge_info[0].keys() for x in y])
    labels = [[re.sub('[?|"\n"]', '', x.split(' ')[0]), re.sub('[?|"\n"]', '', x.split(' ')[2])] for x in node_info[0]]
    ids = [[x.split(' ')[0].strip('?'), x.split(' ')[2].strip('?')] for x in node_info[1]]

    # initialize progress bar progress bar
    widgets = [Percentage(), Bar(), FormatLabel('(elapsed: %(elapsed)s)')]
    pbar = ProgressBar(widgets=widgets, maxval=len(NETS))

    for node in pbar(NETS):
        node_labeler[node] = {}

        for res in results['results']['bindings']:
            node_key = str(res[node]['value'])
            label_value = str([x[1] for x in labels if x[0] == node][0].encode('utf8'))
            id_value = str([x[0] for x in ids if x[1] == node][0].encode('utf8'))

            # NODE TYPE: setting node type information
            if node_key in node_type.keys():
                node_type[node_key].add(node)

            else:
                node_type[node_key] = set()
                node_type[node_key].add(node)

            # NODE METADATA: setting node attributes by NETS node type
            if node_key in node_labeler[node].keys():
                # order matters - not using a set so that each ICE can be mapped to the label with the same index
                node_labeler[node][node_key]['label'].append(res[label_value]['value'].encode('utf8'))
                node_labeler[node][node_key]['id'].append(res[id_value]['value'].encode('utf8'))

            else:
                node_labeler[node][node_key] = {}
                node_labeler[node][node_key]['label'] = [res[label_value]['value'].encode('utf8')]
                node_labeler[node][node_key]['id'] = [res[id_value]['value'].encode('utf8')]

    # close progress bar
    pbar.finish()
    print 'Finished building OWL-NETs metadata dictionary'
    print '\n'

    # CHECK: verify that the counts are correct
    for node in NETS:
        res_count = set()
        for res in results['results']['bindings']:
            res_count.add(res[node]['value'])

        if len(node_labeler[node].keys()) != len(res_count):  # verify the number of nodes in graph is correct
            raise ValueError('The count of results for the ' + str(node) + ' NETS node in the node dictionary differ '
                                                                           'from the query output')

    return node_labeler, node_type
Exemple #26
0
    def sample(self, nsample=1000, from_estimate=False, test=False):

        if (nsample == 1) and (from_estimate == False):
            raise ValueError('for nsample = 1, use .estimate() method!')
        elif (1 < nsample < 100) and (test == False):
            raise ValueError('need at least 100 samples!')
        elif (1 < nsample < 100) and (test == True):
            print 'not enough samples, remember to only ' + \
                'use this as a test-bed!'

        # increasing sample by 10% to ensure
        # robustness against rejected samples
        nsample = int(nsample)

        # set up a dictionary to store tables of relevant data for each spaxel
        res_d = {}

        tfcnames = [
            k for k in self.flux.colnames
            if len(self.flux[k]) > np.isnan(self.flux[k]).sum()
        ]
        self.tfcnames = tfcnames

        #looping over nm measurements
        pbar = ProgressBar(widgets=[Percentage(), Bar(),
                                    ETA()],
                           maxval=self.nm).start()

        for i in range(self.NM0, self.nm):
            blockPrint()
            galnum = self.flux['galnum'][i]
            fr = self.flux[i]
            er = self.err[i]

            fluxi = {
                k: np.random.normal(fr[k], er[k], nsample)
                for k in fr.colnames
                if ((k != 'galnum') and (~np.isnan(fr[k])))
            }

            # set up a table for a given galnum
            res_d[galnum] = t.Table()
            # add a column for flux information
            for n in tfcnames:
                if (n != 'galnum'):
                    if (np.isnan(self.flux[n]).sum() != len(self.flux[n])):
                        res_d[galnum][n] = fluxi[n]
                        res_d[galnum][n].unit = u.Unit('1e-17 erg cm^-2 s^-1')

            scales = ms.diagnostics(nsample, None, self.nps)

            with warnings.catch_warnings():
                warnings.simplefilter('ignore')
                success = metallicity.calculation(scales,
                                                  fluxi,
                                                  self.nm,
                                                  'all',
                                                  1,
                                                  self.logf,
                                                  disp=self.verbose,
                                                  dust_corr=self.dust_corr,
                                                  verbose=self.verbose)
            if success == -1:
                raise ValueError('MINIMUM REQUIRED LINES:  [OII]3727 ' + \
                    '& [OIII] + 5007, or [NII]6584, and Ha & Hb if ' + \
                    'you want dereddening')

            for k, v in scales.mds.iteritems():
                if type(v) == np.ndarray:
                    if np.isnan(v).sum() != len(v):
                        res_d[galnum][k] = v

            enablePrint()
            pbar.update(i)

        pbar.finish()

        self.res_d = res_d
        self.nsample = nsample
        self.Zdiags = res_d[galnum].colnames
Exemple #27
0
semester_codes={'0':'INTERIM', 
                '1':'SPRING', 
                '6':'SUMMER', 
                '9':'FALL'}

default_pickup = {'GEN' : 'MUSME',
                  'BUS' :'BUS',
                  'MM' : 'MUSME',
                  'HEALTH' : 'HLTH',
                  'OXF' :'OXFD',
                  'CHEM' : 'CHEM',
                  'THE' : 'THEO',
                  'LAW' : 'LAW'}

# widget for progress bar
pbar_widget = [Percentage(), ' ', ETA(),  Bar()]


def unnone(str):
    return str if str is not None else ''



# get notes by  type for later reference so it will not take 2 hours to run 82,000 seperate queries
def get_notes(type, sep='; '):

    query = ''' SELECT n.target_id id, IFNULL(group_concat(n.note separator %s), '') notes
                FROM notes n
                WHERE n.type = %s
                GROUP BY n.target_id '''
Exemple #28
0
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        if hasattr(sys, "frozen"):
            resp = request.urlopen(
                "https://api.github.com/repos/xKynn/PathOfExileRPC/releases/latest"
            )
            data = json.load(resp)
            info = win32api.GetFileVersionInfo('launcher.exe', "\\")
            ms = info['FileVersionMS']
            ls = info['FileVersionLS']
            version = "%d.%d.%d.%d" % (win32api.HIWORD(ms), win32api.LOWORD(
                ms), win32api.HIWORD(ls), win32api.LOWORD(ls))
            latest_ver = parse_version(data['tag_name'])
            current_ver = parse_version(version)
            download_url = data["assets"][0]["browser_download_url"]
            if latest_ver > current_ver:
                print("Found a newer release, would you like to update? (y/n)")
                reply = input()
                if reply.startswith("n"):
                    sys.exit()
                print("Starting Update Process")
                print(f"Update Notes: {data['body']}")
                if not os.path.isdir(
                        os.path.join(os.path.dirname(sys.executable),
                                     'updates')):
                    os.mkdir(
                        os.path.join(os.path.dirname(sys.executable),
                                     'updates'))

                widgets = [
                    f'{data["assets"][0]["name"]}: ',
                    Percentage(), ' ',
                    Bar(marker=RotatingMarker()), ' ',
                    ETA(), ' ',
                    FileTransferSpeed()
                ]
                pbar = ProgressBar(widgets=widgets)

                def dl_progress(count, blockSize, totalSize):
                    if pbar.maxval is None:
                        pbar.maxval = totalSize
                        pbar.start()

                    pbar.update(min(count * blockSize, totalSize))

                request.urlretrieve(download_url,
                                    os.path.join(
                                        os.path.dirname(sys.executable),
                                        'updates', data["assets"][0]["name"]),
                                    reporthook=dl_progress)
                atexit.register(os.execl, "updater.exe", "updater.exe")
                sys.exit()
        try:
            with open('config.json') as f:
                js = json.load(f)
        except:
            js = {"name": "", "private": False, "sessid": ""}
        if not js['name']:
            js['name'] = input(
                "Please enter your path of exile account name: ")
            while 1:
                reply = input(
                    "Is your path of exile profile private or is character tab hidden? (y/n): "
                )
                if reply in ["y", "n"]:
                    break
            if reply == "y":
                while 1:
                    sessid = input("Input your POESESSID here: ")
                    confirm = input("Confirm? (y/n)")
                    if confirm in ["y", "n"]:
                        if confirm == "n":
                            continue
                        else:
                            break
                js['sessid'] = sessid
                js['private'] = True
            else:
                js['private'] = False
            if (Path().cwd() / "launcher.exe").is_file():
                while 1:
                    rep = input(
                        "Would you like to setup PathOfExileRPC to start on startup? "
                        "It will start in the background without a window. (y/n)"
                    )
                    if rep in ["y", "n"]:
                        break
                if rep == "y":
                    user = os.getlogin()
                    with open('launcher.vbs', 'w') as f:
                        f.write('Set oShell = CreateObject ("Wscript.Shell")\n'
                                'Dim strArgs\n'
                                f'strArgs = "{Path().cwd()}\launcher.exe"\n'
                                'oShell.Run strArgs, 0, false')
                    with open('poestartup.bat', 'w') as f:
                        f.write(
                            f"{Path().cwd().as_posix()}/launcher.vbs\nexit")
                    found = False
                    p = Path(
                        shell.SHGetFolderPath(0, shellcon.CSIDL_STARTUP, 0, 0))
                    print(p)
                    if p.is_dir():
                        found = True
                    if not found:
                        print(
                            "The startup folder could not be located, you can set this up manually by:\n"
                            "1. Copy the newly created poestartup.bat file in this directory\n"
                            "2. Hold down the windows key and press R, in this window type in shell:startup\n"
                            "3. In the opened folder paste the file you copied earlier"
                        )
                    else:
                        cp('poestartup.bat', f"{p.as_posix()}/poestartup.bat")
                        print(
                            "Done! PathOfExileRPC will now startup when you log into windows."
                        )
            print(
                "Setup is done and your settings will be saved, to go through "
                "setup again just delete the file called config.json")
            with open('config.json', 'w') as f:
                json.dump(js, f)

        self.loop = asyncio.ProactorEventLoop()
        cookies = None
        if js['private']:
            cookies = {'POESESSID': js['sessid']}

        self.cl = PoeRPC(self.loop, js['name'], cookies, self.logger)
Exemple #29
0
    def _do_percentile(self, data, spectrograms, name):

        pbar = None
        try:
            from progressbar import ProgressBar, Percentage, Bar
            pbar = ProgressBar(widgets=[Percentage(), Bar()],
                               maxval=len(data.track_names)*101)
            pbar.start()
        except Exception as e:
            pass

        k = 0
        if config.learn_phase:
            y_real = [[] for _ in range(101)]
            y_imag = [[] for _ in range(101)]
            for track in sorted(data.track_names):
                t = data.prepare_spectrogram(spectrograms[track])
                median_real = np.median(t[:, :, 0])
                median_imag = np.median(t[:, :, 1])
                for i in range(101):
                    if pbar is not None:
                        pbar.update(k)
                    k += 1
                    v = np.percentile(t[:, :, 0], i)
                    y_real[i].append(v-median_real)

                    v = np.percentile(t[:, :, 1], i)
                    y_imag[i].append(v-median_imag)

            if not os.path.exists(self.analysisPath):
                os.mkdir(self.analysisPath)
            h5f_path = os.path.join(self.analysisPath,
                                    "ir_percentile_%s.hdf5" % name)
            h5f = h5py.File(h5f_path, "w")
            h5f.create_dataset(name="real",
                               data=y_real)
            h5f.create_dataset(name="imag",
                               data=y_imag)
            h5f.close()

            plt.figure(figsize=(15, 15))
            plt.subplot(211)
            result = plt.boxplot(y_real, labels=range(101))
            print([l.get_ydata()[0] for l in result["medians"]])
            plt.xticks(rotation=90)
            plt.title("Real")
            plt.xlabel("percentile")
            plt.ylabel("difference from median")

            plt.subplot(212)
            result = plt.boxplot(y_imag, labels=range(101))
            print([l.get_ydata()[0] for l in result["medians"]])
            plt.xticks(rotation=90)
            plt.title("Imag")
            plt.xlabel("percentile")
            plt.ylabel("difference from median")
            if not os.path.exists(self.analysisPath):
                os.mkdir(self.analysisPath)
            plt.savefig(os.path.join(self.analysisPath,
                                     "percentile_%s_ir.png" % name))
            plt.close()
        else:
            y = [[] for _ in range(101)]
            for track in data.track_names:
                t = data.prepare_spectrogram(spectrograms[track])
                median = np.median(t)
                for i in range(101):
                    if pbar is not None:
                        pbar.update(k)
                    k += 1
                    v = np.percentile(t, i)
                    y[i].append(v-median)

            if not os.path.exists(self.analysisPath):
                os.mkdir(self.analysisPath)
            h5f_path = os.path.join(self.analysisPath,
                                    "amp_percentile_%s.hdf5" % name)
            h5f = h5py.File(h5f_path, "w")
            h5f.create_dataset(name="value",
                               data=y)
            h5f.close()

            plt.figure(figsize=(15, 15))
            result = plt.boxplot(y, labels=range(101))
            print([l.get_ydata()[0] for l in result["medians"]])
            plt.xticks(rotation=90)
            plt.title("Amplitude")
            plt.xlabel("percentile")
            plt.ylabel("difference from median")
            if not os.path.exists(self.analysisPath):
                os.mkdir(self.analysisPath)
            plt.savefig(os.path.join(self.analysisPath,
                                     "percentile_%s_amplitude.png" % name))
            plt.close()
Exemple #30
0
def main():

    transferLearning = False
    fineTuning = False
    cuda = True
    model_save_interval = 1000
    image_save_interval = 1000
    update_interval = 85
    log_interval = 100

    testingAccuracyList = []
    trainingAccuracyList = []
    trainingLossList = []
    testingLossList = []
    modelNumList = []
    #

    epoch_size = 30
    batch_size = 50

    result_path = "transfer2_toxicity_classifier_results"
    model_path = "transfer2_toxicity_classifier"
    #     saved_model_path = "toxicity_classifier_models"

    saved_dis_A = "model_dis-14.0"

    # unload the data files
    train, trainLabels, test, testLabels, feature, featureLabels = get_data()

    np.save("train", train)
    np.save("trainLabels", trainLabels)
    np.save("test", test)
    np.save("testLabels", testLabels)
    np.save("feature", feature)
    np.save("featureLabels", featureLabels)

    train = Variable(torch.FloatTensor(train))
    trainLabels = Variable(torch.FloatTensor(trainLabels))

    test = Variable(torch.FloatTensor(test))
    testLabels = Variable(torch.FloatTensor(testLabels))

    feature = Variable(torch.FloatTensor(feature))
    featureLabels = Variable(torch.FloatTensor(featureLabels))

    # Initialize Learning Network
    discriminator = Discriminator()

    if transferLearning or fineTuning:
        device = None

        #         saved_dis_A_path = os.path.join(saved_model_path, saved_dis_A)

        if not cuda:
            device = torch.device('cpu')

            dis_A_state_dict = torch.load("transfer1_model_dis-14.0",
                                          map_location="cpu")

        else:
            device = torch.device('cuda')

            dis_A_state_dict = torch.load("transfer1_model_dis-14.0")

        # obtain the state dictionary of a previously trained model

        discriminator.load_state_dict(dis_A_state_dict, strict=False)

        # send dictionary to device

        discriminator.to(device)

    # Enable GPUs
    if cuda:
        train = train.cuda()
        test = test.cuda()
        feature = feature.cuda()
        discriminator = discriminator.cuda()

    data_size = len(train)
    n_batches = (data_size // batch_size)

    # Set up loss function
    dis_criterion = nn.BCELoss()

    # Obtain parameters to pass to optimiser
    dis_params = discriminator.parameters()

    # Setting up gradient descent (optimiser, using the Adam algorithm)
    optim_dis = optim.Adam(dis_params,
                           lr=0.000005,
                           betas=(0.5, 0.999),
                           weight_decay=0.000007)

    iters = 0

    for epoch in range(epoch_size):
        # Shuffle the order of all the data

        train, trainLabels = shuffle_data(train, trainLabels)

        # Progression bar
        widgets = ['epoch #%d|' % epoch, Percentage(), Bar(), ETA()]
        pbar = ProgressBar(maxval=n_batches, widgets=widgets)
        pbar.start()

        # for each batch
        for i in range(n_batches - 1):

            pbar.update(i)

            # Reset gradients
            discriminator.zero_grad()

            # Get the batches
            batch, batchLabels = getBatch(
                train, trainLabels, i, batch_size
            )  # This returns a batch of dimension batch_size, in_chanels, height, width (30,1,25,8)

            # Enable GPUs
            if cuda:
                batch = batch.cuda()
                batchLabels.cuda()

            trainingClassifications = discriminator(
                batch,
                epoch)  # How well does the real A image fit the A domain?
            trainingLoss = get_dis_loss(trainingClassifications, batchLabels,
                                        dis_criterion, cuda)

            # UPDATE EDGES BASED ON LOSSES *****************************************************

            trainingLoss.backward()
            optim_dis.step()

            if iters % log_interval == 0:

                if cuda:
                    test = test.cuda()
                    testLabels = testLabels.cuda()

                startIndex, stopIndex = getStartStop(testLabels)

                testingClassifications = discriminator(
                    test[startIndex:stopIndex], 0)

                testingLoss = get_dis_loss(testingClassifications,
                                           testLabels[startIndex:stopIndex],
                                           dis_criterion, cuda)
                testingAccuracy = getAccuracy(testingClassifications,
                                              testLabels[startIndex:stopIndex])

                modelNum = iters / model_save_interval

                print()
                print("---------------------")
                print("Model Number: " + str(modelNum))
                modelNumList.append(modelNum)
                print("Training Loss:", as_np(trainingLoss.mean()))
                trainingLossList.append(as_np(trainingLoss.mean()))
                print("Training Accuracy:",
                      getAccuracy(trainingClassifications, batchLabels))
                trainingAccuracyList.append(
                    getAccuracy(trainingClassifications, batchLabels))
                print("Testing Loss:", as_np(testingLoss.mean()))
                testingLossList.append(as_np(testingLoss.mean()))
                print("Testing Accuracy: ", testingAccuracy)
                testingAccuracyList.append(testingAccuracy)

            # save models at the save interval

            if iters % model_save_interval == 0:

                #                 if os.path.exists(model_subdir_path):
                #                     pass
                #                 else:
                #                     os.makedirs(model_subdir_path)

                torch.save(
                    discriminator.state_dict(),
                    os.path.join('transfer2_model_dis-' +
                                 str(iters / model_save_interval)))

            iters += 1
    print("assigningDictionary")
    dictionary = {
        "TrainingLoss": trainingLossList,
        "TrainingAccuracy": trainingAccuracyList,
        "TestingLoss": testingLossList,
        "TestingAccuracy": testingAccuracyList
    }
    print(dictionary)

    import pickle
    outFile = open("transferDataDict.pickle", "wb")
    pickle.dump(dictionary, outFile)

    df = pd.DataFrame(dictionary)
    print(df)
    df.plot.line()