Ejemplo n.º 1
0
def draw_histogram(args):

    logPrint('[draw_histogram] starts')

    #load data
    dist_label_dic, pairwise_dist_dic = load_pairwise_dist(
        args.pairwise_dist_file)
    if args.dist_cols == 'na':
        dist_cols = [c for c in dist_label_dic.keys()]
    else:
        dist_cols = [
            int(col) for col in args.dist_cols.strip().split(',') if col != ''
        ]
    #pdb.set_trace()

    #draw histogram
    if len(dist_cols) > 1:
        fig, axes = plt.subplots(nrows=len(dist_cols))
        for i in dist_cols:
            #i_min = min(min(pairwise_dist_dic[(0,i)]), min(pairwise_dist_dic[(1,i)]))
            #i_max = max(max(pairwise_dist_dic[(0,i)]), max(pairwise_dist_dic[(1,i)]))
            axes[i].hist(pairwise_dist_dic[(0, i)],
                         50,
                         normed=args.normalized,
                         facecolor='green',
                         histtype='step',
                         label='same cluster')
            axes[i].hist(pairwise_dist_dic[(1, i)],
                         50,
                         normed=args.normalized,
                         facecolor='red',
                         histtype='step',
                         label='different cluster')
            axes[i].set_title('%s' % dist_label_dic[i])
            axes[i].legend()
    else:
        fig, ax = plt.subplots()
        ax.hist(pairwise_dist_dic[(0, dist_cols[0])],
                50,
                normed=args.normalized,
                facecolor='green',
                histtype='step',
                label='same cluster')
        ax.hist(pairwise_dist_dic[(1, dist_cols[0])],
                50,
                normed=args.normalized,
                facecolor='red',
                histtype='step',
                label='different cluster')
        ax.set_title('%s' % dist_label_dic[0])
        ax.legend()

    #show/save all
    plt.tight_layout()
    plt.savefig(args.histogram_fig)  #plt.show()
    #pdb.set_trace()

    logPrint('[draw_histogram] finished. %s written' % args.histogram_fig)

    return
Ejemplo n.º 2
0
def siamese_seq2seq_data_pretrain(param_dic, args, data_dir):
    logPrint('siamese_seq2seq_data_pretrain')

    seq2seq_pair = '%s/pretrain/seq2seq_cgk.txt' % data_dir

    #seq2seq_cgk of seq2seq architecture
    cmd = 'python simulate_data.py gen_seq2seq '+\
                    '--output %s '%seq2seq_pair+\
                    '--seq_type %s '%param_dic['seq_type']+\
                    '--seq2seq_type %s '%param_dic['seq2seq_type']+\
                    '--num %s '%param_dic['n_clusters']+\
                    '--length %s '%param_dic['cluster_len']
    #pdb.set_trace()
    run_cmd(cmd)

    if args.purpose == 'train' and param_dic['n_clusters_validation'] > 0:
        #generate validation data
        seq2seq_pair_validation = '%s/pretrain/seq2seq_cgk_validation.txt' % data_dir
        cmd = 'python simulate_data.py gen_seq2seq '+\
                        '--output %s '%seq2seq_pair_validation+\
                        '--seq_type %s '%param_dic['seq_type']+\
                        '--seq2seq_type %s '%param_dic['seq2seq_type']+\
                        '--num %s '%param_dic['n_clusters_validation']+\
                        '--length %s '%param_dic['cluster_len']
        #pdb.set_trace()
        run_cmd(cmd)

    return
Ejemplo n.º 3
0
def export_embedding(args):

    logPrint('[export_embedding] starts')

    pdt = Predict(args.seq_type, args.model_prefix)

    s2n_obj = seq2nn(pdt.seq_type, pdt.maxlen, pdt.blocklen)

    seqs = s2n_obj.transform_seqs_from_fa(args.input_fa)
    N_seqs = len(seqs)

    #pdb.set_trace()

    with open(args.embed_output, 'w') as fout:

        iterCnt = iterCounter(N_seqs, 'export_embedding')

        for seq in seqs:

            iterCnt.inc()

            seq_embed = pdt.get_embed(seq.tseq)
            seq_embed = seq_embed.flatten()
            #embed_str = np.array2string(seq_embed.flatten(), separator=',')
            embed_str = ','.join(str(v) for v in seq_embed)

            fout.write('>%s\n%s\n' % (seq.description, embed_str))

        iterCnt.finish()

    #pdb.set_trace()

    logPrint('[export_embedding] finished. %s written.' % args.embed_output)

    return
Ejemplo n.º 4
0
 def _handle_events(self, events):
     for event in events:
         name = event[0]
         # logPrint ['event: '] + event
         if name in self.event_handlers: # can handle this event
             if len(event) > 1:
                 try:
                     self.event_handlers[name](*event[1:])
                 except TypeError:
                     util.logPrint( (name, event, self.event_handlers), level='debug')
             else:
                 self.event_handlers[name]()
         else:
             util.logPrint('Unhandled event: %s' % event)
Ejemplo n.º 5
0
 def event(self):
     url = self.EVENTS_URL % self.server
     data = {'id': self.client_id}
     try:
         #print self.name + ' state: ' + self.state
         if self.state == 'CONNECTED':
             response = self.browser.open(url, urllib.urlencode(data))
         else:
             response = self.browser.open(url, urllib.urlencode(data), util.WAIT_TIMEOUT)
         data = json.load(response)
     except URLError, e:
         if (self.topics[0] != None) and (e.reason.message == 'timed out'):
             util.logPrint('Failed to find anyone with interest ' + self.topics[0])
             self.stop()
             # TODO: send stoplookingforcommonlikes to omegle server
         return
Ejemplo n.º 6
0
def batch_test_train_1job(input_args):

    param_dic, param_desc, args = input_args  #packed to input_args for parallel purpose

    #pdb.set_trace()

    logLabel = '[train_1job_%s] ' % param_desc

    logPrint(logLabel + 'Start')

    root_dir = param_dic['root_dir']
    data_label = param_dic['data_label']
    model_label = param_dic['model_label']

    data_dir = '%s/%s/train/data/' % (root_dir, data_label)
    run_cmd('mkdir -p %s' % data_dir)
    logPrint('%s created' % data_dir)
    model_dir = '%s/%s/train/%s/%s/' % (root_dir, data_label, model_label,
                                        param_desc)
    run_cmd('mkdir -p %s' % model_dir)
    logPrint('%s created' % model_dir)

    dst_config = '%s/%s/train/%s/config.txt' % (root_dir, data_label,
                                                model_label)
    #if os.path.exists(dst_config)==False:
    run_cmd('cp %s %s' % (args.config_file, dst_config))

    sample_fa = '%s/sample.fa' % data_dir
    sample_dist = '%s/sample.dist' % data_dir

    cmd = 'python train.py use_simulate_data '+\
                          '--input_type                 1 '+\
                          '--train_input1               %s '%sample_fa+\
                          '--train_input2               %s '%sample_dist+\
                          '--seq_type                   %s '%param_dic['seq_type']+\
                          '--train_output_dir           %s '%model_dir+\
                          '--max_num_to_sample          %s '%param_dic['max_num_to_sample']+\
                          '--batch_size                 %s '%param_dic['batch_size']+\
                          '--num_epochs                 %s '%param_dic['num_epochs']+\
                          '--load_model                 %s '%param_dic['load_model']+\
                          '--maxlen                     %s '%param_dic['maxlen']+\
                          '--blocklen                   %s '%param_dic['blocklen']+\
                          '--embedding_size             %s '%param_dic['embedding_size']+\
                          '--num_layers                 %s '%param_dic['num_layers']+\
                          '--hidden_sz                  %s '%param_dic['hidden_sz']+\
                          '--learning_rate              %s '%param_dic['learning_rate']+\
                          '--dropout                    %s '%param_dic['dropout']
    #pdb.set_trace()
    run_cmd(cmd)

    logPrint(logLabel + 'End')

    return
Ejemplo n.º 7
0
def main(topic):
    util.initLogs()
    client1 = Client(event_delay=1, topics=[topic], name='Stranger 1')
    client2 = Client(event_delay=1, topics=[topic], name='Stranger 2')
    client1.register_other_client(client2)
    client2.register_other_client(client1)

    client1.start()
    util.waitForClient(client1, topic)
    client2.start()

    while client1.isAlive() or client2.isAlive():
        try:
            client1.join(0.1)
            client2.join(0.1)
        except KeyboardInterrupt:
            break

    util.logPrint('Disconnecting... ')
    client1.stop()
    client2.stop()
Ejemplo n.º 8
0
def main(topic):
    util.initLogs()
    client1 = Client(event_delay=1, topics=[topic], name='Stranger 1')
    client2 = Client(event_delay=1, topics=[topic], name='Stranger 2')
    client1.register_other_client(client2)
    client2.register_other_client(client1)

    client1.start()
    util.waitForClient(client1, topic)
    client2.start()

    while client1.isAlive() or client2.isAlive():
        try:
            client1.join(0.1)
            client2.join(0.1)
        except KeyboardInterrupt:
            break

    util.logPrint( 'Disconnecting... ')
    client1.stop()
    client2.stop()
Ejemplo n.º 9
0
def batch_test_train(args):

    logPrint('[batch_test_train] Start')

    config_desc_dic_list = parse_config_file(args.config_file)
    #pdb.set_trace()

    logPrint('[batch_test_train] %s parsed' % args.config_file)

    nJobs = args.N

    if nJobs == 1:
        for param_desc, param_dic in config_desc_dic_list:
            if args.a == 0:  #siamese
                batch_test_train_1job((param_dic, param_desc, args))
            elif args.a == 1:  #seq2seq
                #pdb.set_trace()
                batch_test_train_1job_seq2seq((param_dic, param_desc, args))
            elif args.a == 2:  #siamese seq2seq
                batch_test_train_1job_siamese_seq2seq(
                    (param_dic, param_desc, args))
    else:
        args_list = [(param_dic, param_desc, args)
                     for param_desc, param_dic in config_desc_dic_list]
        p = multiprocessing.Pool(nJobs)
        if args.a == 0:  #siamese
            p.map(batch_test_train_1job, args_list)
        elif args.a == 1:  #seq2seq
            p.map(batch_test_train_1job_seq2seq, args_list)
        elif args.a == 2:  #siamese seq2seq
            p.map(batch_test_train_1job_siamese_seq2seq, args_list)

    logPrint('[batch_test_train] End')

    return
Ejemplo n.º 10
0
def draw_roc(args):

    logPrint('[draw_roc] starts')

    #load data
    dist_label_dic, pairwise_dist_dic = load_pairwise_dist(
        args.pairwise_dist_file)
    if args.dist_cols == 'na':
        dist_cols = [c for c in dist_label_dic.keys()]
    else:
        dist_cols = [
            int(col) for col in args.dist_cols.strip().split(',') if col != ''
        ]
    #pdb.set_trace()

    #draw roc
    N = args.n_thresholds
    fig, ax = plt.subplots()  #fig, axes = plt.subplots(nrows=len(dist_cols))
    for i in dist_cols:
        i_min = min(min(pairwise_dist_dic[(0, i)]),
                    min(pairwise_dist_dic[(1, i)]))
        i_max = max(max(pairwise_dist_dic[(0, i)]),
                    max(pairwise_dist_dic[(1, i)]))
        T_list = [float(i_max - i_min) / N * t_idx for t_idx in range(N)]
        #pdb.set_trace()

        md_list = [0] * N
        fp_list = [0] * N
        for t_idx in range(N):
            t_val = T_list[t_idx]
            md_list[t_idx] = sum(
                [1 for v in pairwise_dist_dic[(0, i)] if v > t_val]
            )  #seq pair of same cluster has dist greater than threshold (no edge and thus mis-detection)
            fp_list[t_idx] = sum(
                [1 for v in pairwise_dist_dic[(1, i)] if v < t_val]
            )  #seq pair of diff cluster has dist smaller than threshold (w/ edge and thus false positive)
            if args.normalized == 1:
                md_list[t_idx] = float(md_list[t_idx]) / len(
                    pairwise_dist_dic[(0, i)])
                fp_list[t_idx] = float(fp_list[t_idx]) / len(
                    pairwise_dist_dic[(1, i)])
        #pdb.set_trace()

        ax.plot(fp_list, md_list, label=dist_label_dic[i], marker='.')
        logPrint('roc for %s done' % dist_label_dic[i])

    ax.set_xlabel('False Positive')
    ax.set_ylabel('Mis-detection')
    ax.legend()

    #show/save all
    plt.tight_layout()
    plt.savefig(args.roc_fig)  #plt.show()
    #pdb.set_trace()

    logPrint('[draw_roc] finished. %s written' % args.roc_fig)

    return
Ejemplo n.º 11
0
def batch_test_train_1job_seq2seq(input_args):

    #pdb.set_trace()

    param_dic, param_desc, args = input_args  #packed to input_args for parallel purpose
    logLabel = '[train_1job_seq2seq_%s] ' % param_desc

    logPrint(logLabel + 'Start')

    root_dir = param_dic['root_dir']
    data_label = param_dic['data_label']
    model_label = param_dic['model_label']

    data_dir = '%s/%s/train/data/' % (root_dir, data_label)
    run_cmd('mkdir -p %s' % data_dir)
    logPrint('%s created' % data_dir)
    model_dir = '%s/%s/train/%s/%s/' % (root_dir, data_label, model_label,
                                        param_desc)
    run_cmd('mkdir -p %s' % model_dir)
    logPrint('%s created' % model_dir)

    dst_config = '%s/%s/train/%s/config.txt' % (root_dir, data_label,
                                                model_label)
    if os.path.exists(dst_config) == False:
        run_cmd('cp %s %s' % (args.config_file, dst_config))

    #sample_fa = '%s/sample.fa'%data_dir
    #sample_dist = '%s/sample.dist'%data_dir
    seq2seq_pair = '%s/seq2seq_cgk.txt' % data_dir

    #to be passed to train_seq2seq function
    param_dic['seq2seq_pair_path'] = seq2seq_pair
    param_dic['model_dir_path'] = model_dir

    if param_dic['n_clusters_validation'] > 0:
        #pdb.set_trace()
        param_dic[
            'seq2seq_pair_path_validation'] = '%s/seq2seq_cgk_validation.txt' % data_dir

    train_seq2seq(param_dic)

    logPrint(logLabel + 'End')

    return
Ejemplo n.º 12
0
def batch_test_eval(args):

    logLabel = '[batch_test_eval]'

    logPrint('%s Start' % logLabel)

    config_desc_dic_list = parse_config_file(args.config_file)
    #pdb.set_trace()

    logPrint('%s %s parsed' % (logLabel, args.config_file))

    nJobs = args.N

    for param_desc, param_dic in config_desc_dic_list:
        batch_test_eval_1job((param_dic, param_desc, args))

    logPrint('%s End' % logLabel)

    return
Ejemplo n.º 13
0
 def handle_recaptcha_required(self, challenge):
     self.state = 'RECAPTCHA_REQUIRED'
     util.logPrint('Captcha required. Please go to www.omegle.com and enter the captcha manually.')
     util.logPrint('Disconnecting...')
     self.stop()
Ejemplo n.º 14
0
def load_pairwise_dist(pairwise_dist_file):

    logPrint('[load_pairwise_dist] starts')

    #pdb.set_trace()

    dist_label_dic = {}

    with open(pairwise_dist_file, 'r') as fin:

        line = fin.readline()

        if line[0] == '#':

            tokens = line[1:].strip().split()

        else:

            tokens = line.strip().split()

        n_dist_metric = len(tokens) - 3

        for i in range(n_dist_metric):

            if line[0] == '#':

                dist_label_dic[i] = tokens[i + 3]

            else:

                dist_label_dic[i] = str(i)

    #pdb.set_trace()

    pairwise_dist_dic = {}

    n_lines = sum([1 for line in open(pairwise_dist_file, 'r')])

    iterCnt = iterCounter(n_lines, 'load_pairwise_dist')

    #pdb.set_trace()

    cnt_nan = 0

    with open(pairwise_dist_file, 'r') as fin:

        for line in fin:

            iterCnt.inc()

            if line[0] == '#': continue

            tokens = line.strip().split()

            tp = int(tokens[2])

            has_nan = False

            for i in range(len(tokens) - 3):
                if tokens[i + 3] == 'nan':
                    #pdb.set_trace()
                    has_nan = True
                    break
            if has_nan:
                cnt_nan += 1
                continue

            for i in range(len(tokens) - 3):

                k = (tp, i)

                pairwise_dist_dic.setdefault(k,
                                             []).append(float(tokens[i + 3]))

        iterCnt.finish()

    logPrint('[load_pairwise_dist] finished; %d lines contain nan' % cnt_nan)

    #pdb.set_trace()

    return dist_label_dic, pairwise_dist_dic
Ejemplo n.º 15
0
def batch_test_eval_1job(input_args):

    #pdb.set_trace()

    param_dic, param_desc, args = input_args  #packed to input_args for parallel purpose

    #pdb.set_trace()

    logLabel = '[eval_1job_%s] ' % param_desc

    logPrint(logLabel + 'Start')

    #---------- locations
    root_dir = param_dic['root_dir']
    data_label = param_dic['data_label']
    model_label = param_dic['model_label']

    data_dir = '%s/%s/eval/data/' % (root_dir, data_label)
    run_cmd('mkdir -p %s' % data_dir)
    logPrint('%s created' % data_dir)
    model_train_dir = '%s/%s/train/%s/%s/' % (root_dir, data_label,
                                              model_label, param_desc)
    model_eval_dir = '%s/%s/eval/%s/%s/' % (root_dir, data_label, model_label,
                                            param_desc)
    run_cmd('mkdir -p %s' % model_eval_dir)
    logPrint('%s created' % model_eval_dir)

    sample_fa = '%s/sample.fa' % data_dir

    #---------- select a trained model (i.e. a check point)
    ckpt_path, ckpt_name, step_loss_fn_list = select_ckpt(model_train_dir)

    if ckpt_path == '':
        logPrint(logLabel + 'End (no valid ckpt found)')
        #pdb.set_trace()
        return
    else:
        ckpt_dir = '%s/%s/' % (model_eval_dir, ckpt_name)
        run_cmd('mkdir -p %s' % ckpt_dir)
        logPrint('%s created' % ckpt_dir)
        sample_dist = '%s/sample.dist' % ckpt_dir
        #pdb.set_trace()

    #---------- proc eval job

    tasks = [int(t) for t in args.tasks.split(',') if t != '']

    # dist
    cmd = 'python simulate_data.py calc_dist '+\
                        '--distance_type_list %s '%param_dic['dist_tp_list_eval'] +\
                        '--seq_type %s '%param_dic['seq_type'] +\
                        '--seq_fa %s '%sample_fa +\
                        '--dist_out %s '%sample_dist+\
                        '--thread %d '%args.N +\
                        '--addheader %s '%param_dic['add_hd_eval'] +\
                        '--clear_intermediate %s '%param_dic['clear_interm_eval'] +\
                        '--model_prefix %s '%ckpt_path +\
                        '--max_num_dist_1thread %s '%param_dic['max_num_dist_1thread_eval']
    #pdb.set_trace();
    if 0 in tasks: run_cmd(cmd)

    # hist
    hist_fig_path = '%s/hist.norm_%s.png' % (ckpt_dir,
                                             param_dic['normalized_hist'])

    cmd = 'python evaluation.py draw_histogram '+\
                     '--pairwise_dist_file %s '%sample_dist +\
                     '--histogram_fig %s '%hist_fig_path +\
                     '--dist_cols %s '%param_dic['dist_cols'] +\
                     '--normalized %s '%param_dic['normalized_hist']
    #pdb.set_trace();
    if 1 in tasks: run_cmd(cmd)

    # roc
    roc_fig_path = '%s/roc.norm_%s.png' % (ckpt_dir,
                                           param_dic['normalized_roc'])

    cmd = 'python evaluation.py draw_roc '+\
                     '--pairwise_dist_file %s '%sample_dist +\
                     '--roc_fig %s '%roc_fig_path +\
                     '--n_thresholds %s '%param_dic['n_thresholds'] +\
                     '--normalized %s '%param_dic['normalized_roc']
    #pdb.set_trace();
    if 2 in tasks: run_cmd(cmd)

    # export embed
    embed_output = '%s/sample.embed.fa'

    cmd = 'python evaluation.py export_embedding '+\
                     '--seq_type %s '%param_dic['seq_type'] +\
                     '--input_fa %s '%sample_fa +\
                     '--embed_output %s '%embed_output +\
                     '--model_prefix %s '%ckpt_path
    #pdb.set_trace();
    if 3 in tasks: run_cmd(cmd)

    logPrint(logLabel + 'End')

    return
Ejemplo n.º 16
0
def batch_test_data(args):
    #pdb.set_trace()
    logLabel = '[batch_test_data]'

    logPrint('%s Start' % (logLabel))

    config_desc_dic_list = parse_config_file(args.config_file)
    param_dic = config_desc_dic_list[0][
        1]  #we only need location info from param_dic, param combinations not important here

    #---------- locations
    root_dir = param_dic['root_dir']
    data_label = param_dic['data_label']
    data_dir = '%s/%s/%s/data/' % (root_dir, data_label, args.purpose)

    run_cmd('mkdir -p %s' % data_dir)
    logPrint('%s created' % data_dir)

    cluster_fa = '%s/cluster.fa' % data_dir
    sample_fa = '%s/sample.fa' % data_dir
    sample_dist = '%s/sample.dist' % data_dir

    seq2seq_pair = '%s/seq2seq_cgk.txt' % data_dir

    #for siamese seq2seq
    #siamese_seq2seq = '%s/siamese_seq2seq.txt'%data_dir

    #---------- what kind of data to generate
    tasks = [int(d) for d in args.tasks.split(',') if d != '']

    #---------- actual commands
    if 0 in tasks:
        cmd = 'python simulate_data.py gen_cluster_center '+\
                        '--output %s '%cluster_fa+\
                        '--seq_type %s '%param_dic['seq_type']+\
                        '--num %s '%param_dic['n_clusters']+\
                        '--length %s '%param_dic['cluster_len']+\
                        '--weight_distr %s '%param_dic['weight_distr']+\
                        '--sid_pre %s '%param_dic['sid_pre']
        run_cmd(cmd)

    if 1 in tasks:
        cmd = 'python simulate_data.py sample_from_cluster '+\
                        '--fa_input %s '%cluster_fa+\
                        '--type %s '%param_dic['seq_type']+\
                        '--fa_output %s '%sample_fa+\
                        '--prefix %s '%param_dic['sample_prefix']+\
                        '--total_samples %s '%param_dic['n_tot_samples']+\
                        '--copy  %s '%param_dic['n_copy']+\
                        '--ins %s '%param_dic['rate_ins']+\
                        '--dele %s '%param_dic['rate_del']+\
                        '--sub %s '%param_dic['rate_sub']+\
                        '--thread %d '%args.N+\
                        '--clear_split_files %s '%param_dic['clear_split']
        run_cmd(cmd)

    if 2 in tasks and args.purpose == 'train':
        cmd = 'python simulate_data.py calc_dist '+\
                        '--distance_type_list %s '%param_dic['dist_tp_list_data']+\
                        '--seq_type %s '%param_dic['seq_type']+\
                        '--seq_fa %s '%sample_fa+\
                        '--dist_out %s '%sample_dist+\
                        '--thread %d '%args.N+\
                        '--addheader %s '%param_dic['add_hd']+\
                        '--clear_intermediate %s '%param_dic['clear_interm']+\
                        '--max_num_dist_1thread %s '%param_dic['max_num_dist_1thread']
        #pdb.set_trace()
        run_cmd(cmd)

    if 3 in tasks:
        #seq2seq_cgk of seq2seq architecture
        cmd = 'python simulate_data.py gen_seq2seq '+\
                        '--output %s '%seq2seq_pair+\
                        '--seq_type %s '%param_dic['seq_type']+\
                        '--seq2seq_type %s '%param_dic['seq2seq_type']+\
                        '--num %s '%param_dic['n_clusters']+\
                        '--length %s '%param_dic['cluster_len']
        #pdb.set_trace()
        run_cmd(cmd)

        if args.purpose == 'train' and param_dic['n_clusters_validation'] > 0:
            #generate validation data
            seq2seq_pair_validation = '%s/seq2seq_cgk_validation.txt' % data_dir
            cmd = 'python simulate_data.py gen_seq2seq '+\
                            '--output %s '%seq2seq_pair_validation+\
                            '--seq_type %s '%param_dic['seq_type']+\
                            '--seq2seq_type %s '%param_dic['seq2seq_type']+\
                            '--num %s '%param_dic['n_clusters_validation']+\
                            '--length %s '%param_dic['cluster_len']
            #pdb.set_trace()
            run_cmd(cmd)

    if 4 in tasks:

        siamese_seq2seq = '%s/siamese_seq2seq.txt' % data_dir

        #siamese_seq2seq of siamese seq2seq architecture
        cmd = 'python simulate_data.py gen_siamese_seq2seq '+\
                        '--output %s '%siamese_seq2seq+\
                        '--seq_type %s '%param_dic['seq_type']+\
                        '--si_correlation_type %s '%param_dic['si_correlation_type']+\
                        '--num %s '%param_dic['n_clusters']+\
                        '--length %s '%param_dic['cluster_len']+\
                        '--length2 %s '%param_dic['cluster_len2']+\
                        '--rate_ins %s '%param_dic['rate_ins']+\
                        '--rate_del %s '%param_dic['rate_del']+\
                        '--rate_sub %s '%param_dic['rate_sub']
        #pdb.set_trace()
        run_cmd(cmd)

        if args.purpose == 'train' and param_dic['n_clusters_validation'] > 0:
            #generate validation data
            siamese_seq2seq_validation = '%s/siamese_seq2seq_validation.txt' % data_dir
            cmd = 'python simulate_data.py gen_siamese_seq2seq '+\
                            '--output %s '%siamese_seq2seq_validation+\
                            '--seq_type %s '%param_dic['seq_type']+\
                            '--si_correlation_type %s '%param_dic['si_correlation_type']+\
                            '--num %s '%param_dic['n_clusters_validation']+\
                            '--length %s '%param_dic['cluster_len']+\
                            '--length2 %s '%param_dic['cluster_len2']+\
                            '--rate_ins %s '%param_dic['rate_ins']+\
                            '--rate_del %s '%param_dic['rate_del']+\
                            '--rate_sub %s '%param_dic['rate_sub']
            #pdb.set_trace()
            run_cmd(cmd)

    logPrint('%s End' % (logLabel))
    #
    if int(param_dic['apply_pre_train']) == 1:
        siamese_seq2seq_data_pretrain(param_dic, args, data_dir)

    return
Ejemplo n.º 17
0
 def handle_stranger_disconnected(self):
     self.connected = False
     util.logPrint( '[%s] Stranger has disconnected.' % self.name)
     self.disconnect()
     self.other_client.disconnect()
Ejemplo n.º 18
0
 def handle_got_message(self, message):
     try:
         util.logPrint( '[%s] Stranger: %s' % (self.name, message))
     except Exception, e:
         util.logPrint( str(e))
         traceback.print_exc()
Ejemplo n.º 19
0
 def handle_common_likes(self, likes):
     util.logPrint( '[%s] You both like %s.' % (self.name, ', '.join(likes)))
Ejemplo n.º 20
0
 def handle_connected(self):
     self.state = 'CONNECTED'
     self.connected = True
     util.logPrint ("You're now chatting with a random stranger. Say hi!")
Ejemplo n.º 21
0
 def handle_waiting(self):
     self.state = 'WAITING'
     util.logPrint("Looking for someone you can chat with...")