コード例 #1
0
    def __init__(self, args):
        super(BiLSTM, self).__init__()
        self.args = args
        self.hidden_dim = args.hidden_dim
        self.batch_size = args.batch_size
        self.dropout = nn.Dropout(args.dropout)
        self.dropout_embed = nn.Dropout(args.dropout_embed)

        self.word_embedding = Embedding.Embedding(args.embed_num,
                                                  args.embedding_dim)
        self.use_cuda = args.use_cuda
        self.use_pretrained_emb = args.use_pretrained_emb

        if self.use_pretrained_emb:
            pretrained_weight = loader.vector_loader(args.word_list)
            pretrained_weight = torch.FloatTensor(pretrained_weight)
            # print(pretrained_weight)
            self.word_embedding_static = Embedding.ConstEmbedding(
                pretrained_weight)

        self.lstm = nn.LSTM(args.embedding_dim * 2,
                            args.hidden_dim,
                            bidirectional=True,
                            dropout=args.dropout_model)

        self.hidden2label1 = nn.Linear(args.hidden_dim * 2, args.hidden_dim)
        self.hidden2label2 = nn.Linear(args.hidden_dim, args.class_num)
        self.hidden = self.init_hidden(args.batch_size)
コード例 #2
0
def Get_vector_of_difference_between_clusters(data_path, result_path, step,
                                              n_features, Embedding):
    #data_path=FLAGS.data_path
    #result_path=FLAGS.result_dir
    #step=FLAGS.step
    #n_features = FLAGS.n_features_embedding

    data1 = GetData(data_path + str(2 * step) + "/")
    print(data_path + str(2 * step) + "/")
    cluster1_mean = np.zeros(n_features)
    for d in data1:
        cluster1_mean += Embedding(d)
    cluster1_mean /= len(data1)

    data2 = GetData(data_path + str(2 * step + 1) + "/")
    cluster2_mean = np.zeros(n_features)
    for d in data2:
        cluster2_mean += Embedding(d)
    cluster2_mean /= len(data2)

    if not os.path.exists(result_path):
        os.makedirs(result_path)
    np.savetxt(result_path + str(2 * step) + "_" + str(2 * step + 1) + ".txt",
               cluster2_mean - cluster1_mean)
    np.savetxt(result_path + str(2 * step) + ".txt", cluster1_mean)
    np.savetxt(result_path + str(2 * step + 1) + ".txt", cluster2_mean)
コード例 #3
0
def datasets_features(name_dataset_train=None,
                      name_dataset_dev=None,
                      creation_features=True):
    ###############################################################################
    # This function creates or loads all the main features to train the model
    # (train data, dev_data, vocabulary)
    #
    # Input:
    #   name_dataset_train: name of the train dataset
    #   name_dataset_dev: name of the dev dataset
    #   creation_features: Boolean variable for the creation of the features
    #
    # Output:
    #   train_data: Training data to train the model
    #   dev_data: Dev data to test the model
    ###############################################################################

    try:
        path_train = dictionary_paths[name_dataset_train]
    except:
        path_train = None

    try:
        path_dev = dictionary_paths[name_dataset_dev]
    except:
        path_dev = None

    if creation_features:

        # Creation features
        prep.save_dataset(dev_test=False, dataset=path_train, limit_length=2)
        prep.save_dataset(dev_test=True, dataset=path_dev, limit_length=0)
        Vocabulary.create_vocabulary_unigram(VOCAB_UNIGRAM_SIZE)
        Vocabulary.create_vocabulary_bigrams(VOCAB_BIGRAM_SIZE)
        Embedding.embedding_matrix('vocab_unigram.npy')
        Embedding.embedding_matrix('vocab_bigram.npy', False)

    else:

        # load Train set
        train_x = Vocabulary.open_file('Train_data.utf8', True)
        train_y = Vocabulary.open_file('Train_label.txt', False)

        # load Dev set
        dev_x = Vocabulary.open_file('Dev_data.utf8', True)
        dev_y = Vocabulary.open_file('Dev_label.txt', False)

        # Creation of the training and dev dataset
        train_data = prepare_dataset(train_x, train_y, MAX_LENGTH)
        dev_data = prepare_dataset(dev_x, dev_y, MAX_LENGTH)

        return train_data, dev_data
コード例 #4
0
    def __init__(self, args, pretrained):
        super(Transfrmr_bidaf, self).__init__()
        self.embed = embed.Embedding(args, pretrained)

        # Encoder module
        self.encoder_ctxt = encode.Encoder_block(args, 2 * args.word_dim)
        self.encoder_ques = encode.Encoder_block(args, 2 * args.word_dim)

        #Attention Flow Layer
        self.att_weight_c = Linear(args.hidden_size * 2, 1, args.dropout)
        self.att_weight_q = Linear(args.hidden_size * 2, 1, args.dropout)
        self.att_weight_cq = Linear(args.hidden_size * 2, 1, args.dropout)
        self.N = args.Model_encoder_size
        self.dropout = nn.Dropout(p=args.dropout)

        #Model Encoding Layer
        self.Model_encoder = self.get_clones(
            encode.Encoder_block(args, 8 * args.word_dim),
            args.Model_encoder_size)
        # self.Model2start= Linear(16 * args.word_dim, 8 * args.word_dim,args.dropout)
        # self.Model2end = Linear(16 * args.word_dim, 8 * args.word_dim,args.dropout)
        # self.start_idx = Linear(16 * args.word_dim,1,args.dropout)
        # self.end_idx = Linear(16 * args.word_dim, 1, args.dropout)
        self.start_idx = nn.Linear(16 * args.word_dim, 1)
        self.end_idx = nn.Linear(16 * args.word_dim, 1)
コード例 #5
0
ファイル: PCA.py プロジェクト: Seplanna/visualQuestionnaire
def AutoEncoderLatent(data_path, feature_dimension_embedding, Embedding):
    data = GetData(data_path)
    random.shuffle(data)
    latent = np.zeros([len(data), feature_dimension_embedding])
    for d in range(len(data)):
        latent[d] = Embedding(data[d])
    return data, latent
コード例 #6
0
ファイル: biLSTM.py プロジェクト: hhy5277/Stance-Detection
    def __init__(self, args):
        super(Model, self).__init__()
        self.embed_size = args.embed_size
        self.label_size = args.label_size
        self.topic_word_num = args.topic_word_num
        self.biLSTM_hidden_size = args.biLSTM_hidden_size
        self.biLSTM_hidden_num = args.biLSTM_hidden_num
        self.save_pred_emd_path = args.save_pred_emd_path
        self.word_num = args.word_num
        self.dropout = args.dropout
        self.word_alpha = args.word_alpha
        self.topic_alpha = args.topic_alpha

        self.embeddingTopic = nn.Embedding(self.topic_word_num,
                                           self.embed_size)
        self.embeddingText = nn.Embedding(self.word_num, self.embed_size)
        if args.using_pred_emb:
            load_emb_text = Embedding.load_predtrained_emb_zero(
                self.save_pred_emd_path,
                self.word_alpha.string2id,
                padding=True)
            load_emb_topic = Embedding.load_predtrained_emb_zero(
                self.save_pred_emd_path,
                self.topic_alpha.string2id,
                padding=False)
            # self.embeddingTopic = ConstEmbedding(load_emb_topic)
            # self.embeddingText = ConstEmbedding(load_emb_text)
            # self.embeddingTopic = nn.Embedding(args.topicWordNum, self.EmbedSize, sparse=True)
            # self.embeddingText = nn.Embedding(args.wordNum, self.EmbedSize, sparse=True)
            self.embeddingTopic = nn.Embedding(self.topic_word_num,
                                               self.embed_size)
            self.embeddingText = nn.Embedding(self.word_num, self.embed_size)
            self.embeddingTopic.weight.data.copy_(load_emb_topic)
            self.embeddingText.weight.data.copy_(load_emb_text)
        self.biLSTM = nn.LSTM(self.embed_size,
                              self.biLSTM_hidden_size,
                              dropout=self.dropout,
                              num_layers=self.biLSTM_hidden_num,
                              batch_first=True,
                              bidirectional=True)
        self.linear1 = nn.Linear(self.biLSTM_hidden_size * 4,
                                 self.biLSTM_hidden_size // 2)
        self.linear2 = nn.Linear(self.biLSTM_hidden_size // 2, self.label_size)
コード例 #7
0
def GetTheClosestPoint(point, data, Embedding):
    dist = 1e+10
    res = ''
    for d in data:
        #basename = os.path.basename(d).split("_")[-embedding_size-1:-1]
        #local_dist = np.linalg.norm(point- np.array([float(f) for f in basename]))
        local_dist = np.linalg.norm(point - Embedding(d))
        if local_dist < dist:
            dist = local_dist
            res = d
    return res
コード例 #8
0
    def __init__(self, data_path, n_bins, n_pictures_per_bin, n_features, Embedding):
        # download dataset------------------------------------------
        self.data = glob(os.path.join(data_path, "*.jpg"))
        self.n_bins = n_bins
        self.n_features = n_features
        self.features = []
        for d in self.data:
            self.features.append(Embedding(d))
        self.features = np.array(self.features)
        self.n_pictures_per_bin = n_pictures_per_bin
        self.feature_n = 0
        # --------------------------

        self.step = 0
        self.n_pictures = self.n_bins * self.n_pictures_per_bin
        self.show_images()
コード例 #9
0
    def __init__(self, args):
        super(BiLSTM, self).__init__()
        self.args = args
        self.hidden_dim = args.hidden_dim
        self.batch_size = args.batch_size
        self.dropout = nn.Dropout(args.dropout)
        self.dropout_embed = nn.Dropout(args.dropout_embed)

        self.word_embedding = Embedding.Embedding(args.embed_num,
                                                  args.embedding_dim,
                                                  padding_idx=args.padID)

        # self.word_embedding.reset_parameters()
        self.lstm = nn.LSTM(args.embedding_dim,
                            args.hidden_dim,
                            bidirectional=True,
                            dropout=args.dropout_model)

        self.hidden2label1 = nn.Linear(args.hidden_dim * 2, args.hidden_dim)
        self.hidden2label2 = nn.Linear(args.hidden_dim, args.class_num)
        self.hidden = self.init_hidden(args.batch_size, args.use_cuda)
コード例 #10
0
def ClusterData(data, embedding_size, Embedding, n_clusters, result_dir, step):
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)
    latent = np.zeros([len(data), embedding_size])
    for d in range(len(data)):
        #basename = os.path.basename(data[d]).split("_")[-embedding_size-1:-1]
        #latent[d] = np.array([float(f) for f in basename])
        latent[d] = Embedding(data[d])
    clusters = KMeans(n_clusters=n_clusters, random_state=200).fit(latent)

    n_ex = 128
    images_indexes = []
    #os.makedirs(result_dir + "/Anchor_im/")
    for c in range(n_clusters):
        cluster_images_index = [
            i for i in range(len(data)) if clusters.labels_[i] == c
        ]
        images_indexes.append(cluster_images_index)
        print('cluster = ', c, len(cluster_images_index))
        distancse_to_cluster = clusters.transform(latent[cluster_images_index])
        distancse_to_cluster = distancse_to_cluster.T[c]
        arg_sort = np.argsort(distancse_to_cluster[:n_ex])
        images_index = np.array(cluster_images_index)
        im = images_index[arg_sort]
        batch_files = [data[i] for i in im]
        batch = [
            get_image(batch_file,
                      input_height=100,
                      input_width=150,
                      resize_height=100,
                      resize_width=150,
                      crop=False,
                      grayscale=False) for batch_file in batch_files
        ]
        imsave(np.array(batch[:144]), [12, 12],
               result_dir + '/test_' + str(step) + '_' + str(c) + "_.png")
        #subprocess.call(['cp', batch_files[0], result_dir + "/Anchor_im/" + os.path.basename(batch_files[0])])
    return images_indexes, clusters
コード例 #11
0
def embed(y, n_dt, code, P, n_code=32):
    """
    """
    y = y.flatten()
    Dc = tf.stt_my(y, n_dt=n_dt)
    if code == 'random':
        wmbits = np.random.choice(16, size=n_code)
    else:
        wmbits = pre.hex_to_code(code)
    ns = pre.code_to_hex(wmbits)

    signal_wmd = np.zeros_like(Dc)
    wt = pre.time_weight(Dc, N=n_code, L_n=P.shape[1])
    for i in range(Dc.shape[1]):
        X_i = Dc[:, i]
        X_l, X_selected, X_h = pre.dct_segment_generate(X_i,
                                                        N=n_code,
                                                        L_n=P.shape[1])
        Y = eb.watermark_embed(X_selected, P, wmbits, N=n_code, weight=wt[i])
        Y_i = pre.dct_reconstruct(X_l, Y, X_h)
        signal_wmd[:, i] = Y_i
    embeded = tf.istt_my(signal_wmd, length=y.shape[0])

    return embeded, ns
コード例 #12
0
def CrossRecurrencePlot(x,y,m,t,e,distance,standardization = False, plot = False):
    """
    It computes and plots the (cross)recurrence plot of the uni/multivariate input signal(s) x and y (in pandas DataFrame format).
    
    **Reference :**
    
    * N. Marwan, M. Carmen Romano, M. Thiel and J. Kurths. "Recurrence plots for the analysis of complex systems". Physics Reports 438(5), 2007.
    
    :param x:
        first input signal
    :type x: pd.DataFrame
    
    :param y:
        second input signal
    :type y: pd.DataFrame
    
    :param m:
        embedding dimension
    :type m: int
    
    :param t:
       embedding delay
    :type t: int
    
    :param eps:
        threshold for recurrence
    :type eps: float    
    
    :param distance:
        It specifies which distance method is used. It can assumes the following values:\n
        1. 'euclidean';
        2. 'maximum';
        3. 'manhattan'
        4. 'fixed distance maximum norm'
        
    :type distance: str
    
    :param standardization:
       if True data are nomalize to zero mean and unitary variance. Default: False
    :type standardization: bool
    
    
    :param plot:
       if True the plot of correlation function is returned. Default: False
    :type standardization: bool

    """
    ' Raise error if parameters are not in the correct type '
    if not(isinstance(x, pd.DataFrame)) : raise TypeError("Requires x to be a pd.DataFrame")
    if not(isinstance(y, pd.DataFrame)) : raise TypeError("Requires y to be a pd.DataFrame")

    if not(isinstance(m, int)) : raise TypeError("Requires m to be an integer")
    if not(isinstance(t, int)) : raise TypeError("Requires t to be an integer")
    if not(isinstance(e, float)): raise TypeError("requires eps to be a float")
    if not(isinstance(distance, str)) : raise TypeError("Requires distance to be a string")
    if not(isinstance(standardization, bool)) : raise TypeError("Requires standardization to be a bool")
    if not(isinstance(plot, bool)) : raise TypeError("Requires plot to be a bool")

        
    ' Raise error if parameters do not respect input rules '

    if m <= 0 : raise ValueError("Requires m to be positive and greater than 0")
    if t<= 0 : raise ValueError("Requires t to be positive and  greater from 0")
    if e<0: raise ValueError("Requires eps to be positive")
    if distance != 'euclidean' and distance != 'maximum' and distance !='manhattan' and distance != 'rr': raise ValueError("Requires a valid way to compute distance")
    
    if standardization==True:
        x=Standardize.Standardize(x)
        y=Standardize.Standardize(y)

    if (m!=1) or (t!=1):
        x=Embedding.Embedding(x,m,t)
        y=Embedding.Embedding(y,m,t)

    vd=2    
    if(distance=='euclidean'):
        pass
    elif(distance=='manhattan'):
        vd=1
    elif(distance=='maximum' or distance=='rr'):
        vd=np.inf

    crp_tmp=np.ones((x.shape[0],y.shape[0]))
 
    if distance!='rr':
        for i in range(0,x.shape[0]): 
            x_row_rep_T=pd.concat([x.iloc[i,:]]*y.shape[0],axis=1,ignore_index=True)
            x_row_rep=x_row_rep_T.transpose()
            
            dist=Distance.Minkowski(x_row_rep,y,vd)
            
            diff_threshold_norm=e-dist
            diff_threshold_norm[diff_threshold_norm>=0]=0
            diff_threshold_norm[diff_threshold_norm<0]=1
                
            crp_tmp[x.shape[0]-1-i,:]=diff_threshold_norm.T
    
            crp=np.fliplr((crp_tmp).T)
    
    else:
        dist_m=np.zeros((x.shape[0],y.shape[0]))
        for i in range(0,x.shape[0]): 
            x_row_rep_T=pd.concat([x.iloc[i,:]]*y.shape[0],axis=1,ignore_index=True)
            x_row_rep=x_row_rep_T.transpose()
        
            dist=Distance.Minkowski(x_row_rep,y,vd)            
            
            dist_m[i,:]=dist.T
            
        dist_m_f=dist_m.flatten()
        dist_m_f.sort()
        
        e=dist_m_f[np.ceil(e*len(dist_m_f))]
        
        for i in range(0,x.shape[0]): 
            diff_threshold_norm=e-dist_m[i,:].T
            diff_threshold_norm[diff_threshold_norm>=0]=0
            diff_threshold_norm[diff_threshold_norm<0]=1

            crp_tmp[x.shape[0]-1-i,:]=diff_threshold_norm.T

            crp=np.fliplr((crp_tmp).T)

    result = dict()
    result['crp']= crp
    
    if plot:
        plt.ion()
        figure = plt.figure()
        ax = figure.add_subplot(111)

        ax.set_xlabel('Time (in samples)')
        ax.set_ylabel('Time (in samples)')
        ax.set_title('Cross recurrence matrix')

        ax.imshow(result['crp'], plt.cm.binary_r, origin='lower',interpolation='nearest', vmin=0, vmax=1)

    return (result)
コード例 #13
0
ファイル: PCA.py プロジェクト: Seplanna/visualQuestionnaire
if FLAGS.PCA_GIST:
    data_path = FLAGS.data_path
    n_features_embedding = FLAGS.n_features_embedding
    n_features_new = FLAGS.new_features_embedding
    result_dir = FLAGS.result_dir
    data, latent = GIST_latent(data_path, n_features_embedding)
    PCA_(data, latent, n_features_new, result_dir)

if FLAGS.PCA_AutoEncoder:
    data_path = FLAGS.data_path
    n_features_embedding = FLAGS.n_features_embedding
    n_features_new = FLAGS.new_features_embedding
    result_dir = FLAGS.result_dir

    data, latent = AutoEncoderLatent(data_path, n_features_embedding,
                                     Embedding(FLAGS.embedding))
    PCA_(data, latent, n_features_new, result_dir)

if FLAGS.My:
    data_path = FLAGS.data_path
    n_features_embedding = FLAGS.n_features_embedding
    result_dir = FLAGS.result_dir
    svm_path = FLAGS.svm
    data = My_latent(data_path, svm_path, n_features_embedding, result_dir)

if FLAGS.AutoEncoderByDirection:
    data_path = FLAGS.data_path
    n_features_embedding = FLAGS.n_features_embedding
    result_dir = FLAGS.result_dir
    direction_path = FLAGS.direction_path
    data, latent = AutoEncoderLatent(data_path, 10, Embedding(FLAGS.embedding))
コード例 #14
0
ファイル: Hybrid_Limaye.py プロジェクト: YEslahi/WebTable
def RunHybrid(TableList):
    print("I am in Hybrid of Limaye")

    #reading the Limaye table
    os.chdir('./data/Limaye/tables_instance/')

    #contains prediction results
    mapped_Prediction_Dict = dict()
    RLU_precision = 0
    RLU_recall = 0
    RLU_F1 = 0
    RLU_TP_Total = 0
    RLU_FN_Total = 0
    RLU_FP_Total = 0
    #---------------
    Emb_precision = 0
    Emb_recall = 0
    Emb_F1 = 0
    Emb_TP_Total = 0
    Emb_FN_Total = 0
    Emb_FP_Total = 0
    #---------------
    HybridI_precision = 0
    HybridI_recall = 0
    HybridI_F1 = 0
    HybridI_TP_Total = 0
    HybridI_FN_Total = 0
    HybridI_FP_Total = 0
    #---------------
    HybridII_precision = 0
    HybridII_recall = 0
    HybridII_F1 = 0
    HybridII_TP_Total = 0
    HybridII_FN_Total = 0
    HybridII_FP_Total = 0

    #change the path to  have csv filename without the path inside
    os.chdir(
        '/home/yasamin/Codes/WebTableAnnotation/data/Limaye/tables_instance/')

    #reading all CSV files in path
    allCsvTableFiles = glob.glob('*.csv')

    #print(allCsvTableFiles)
    start_time = time.time()
    T = pd.DataFrame()
    i = 0

    for table_csv in allCsvTableFiles:

        if (table_csv in TableList):
            os.chdir(
                '/home/yasamin/Codes/WebTableAnnotation/data/Limaye/tables_instance/'
            )
            with open(table_csv, 'r', encoding='utf-8') as csvTableFile:
                print("This is the Table file name :\n\n", table_csv)

                T = pd.read_csv(table_csv, header=None)
                #switch path to normal form
                os.chdir('/home/yasamin/Codes/WebTableAnnotation/')

                #Remove the row if there is no GT in entity file for it.
                entity_csv = table_csv
                os.chdir(
                    '/home/yasamin/Codes/WebTableAnnotation/data/Limaye/entities_instance/'
                )
                with open(entity_csv, 'r', encoding='utf-8') as csvEntityFile:
                    print("This is the Entity file name :\n\n", entity_csv)
                    E = pd.read_csv(entity_csv, header=None)
                    count_row = T.shape[0]
                    for i in range(0, count_row):
                        if not (i in E[E.columns[-1]].values):
                            print("This i does not exists:", i)
                            T = T.drop([i])

                #switch path to normal form
                os.chdir('/home/yasamin/Codes/WebTableAnnotation/')
                #----------------------------------------------------------------
                #uncomment for Refined Lookup
                #----------------------------------------------------------------
                # RLU_TPrimeIsAnnotated , RLU_TPrimeAnnotation = fp.getFullPhase(T,table_csv)
                # print("Refined Lookup result:")
                # print("-"*30)
                # print(RLU_TPrimeAnnotation)
                # print(RLU_TPrimeIsAnnotated)
                # #metrics:
                # RLU_TP, RLU_FN, RLU_FP =  MCal.MetricsCalcul(T,table_csv,RLU_TPrimeAnnotation,RLU_TPrimeIsAnnotated)
                # RLU_TP_Total = RLU_TP_Total+RLU_TP
                # RLU_FN_Total = RLU_FN_Total+RLU_FN
                # RLU_FP_Total = RLU_FP_Total+RLU_FP
                # RLU_precision, RLU_recall, RLU_F1 = PRF.Precision_Recall_F1(RLU_TP_Total,RLU_FN_Total,RLU_FP_Total)
                #----------------------------------------------------------------
                #uncomment for Embedding
                #----------------------------------------------------------------
                Emb_TPrimeIsAnnotated, Emb_TPrimeAnnotation = emb.getEmbedding(
                    T)
                print("Embedding result:")
                print("-" * 30)
                print(Emb_TPrimeAnnotation)
                print(Emb_TPrimeIsAnnotated)
                #metrics:
                Emb_TP, Emb_FN, Emb_FP = MCal.MetricsCalcul(
                    T, table_csv, Emb_TPrimeAnnotation, Emb_TPrimeIsAnnotated)
                Emb_TP_Total = Emb_TP_Total + Emb_TP
                Emb_FN_Total = Emb_FN_Total + Emb_FN
                Emb_FP_Total = Emb_FP_Total + Emb_FP
                Emb_precision, Emb_recall, Emb_F1 = PRF.Precision_Recall_F1(
                    Emb_TP_Total, Emb_FN_Total, Emb_FP_Total)

            #----------------------------------------------------------------
            #uncomment for Hybrid I(uncomment also embedding and RLU)
            #----------------------------------------------------------------
            # #Hybrid1

            # 	HybridI_TPrimeIsAnnotated = RLU_TPrimeIsAnnotated
            # 	HybridI_TPrimeAnnotation = RLU_TPrimeAnnotation

            # 	for k in HybridI_TPrimeIsAnnotated:
            # 		if(HybridI_TPrimeIsAnnotated[k] == True):
            # 			continue
            # 		elif(HybridI_TPrimeIsAnnotated[k] == False):
            # 			HybridI_TPrimeIsAnnotated[k] = Emb_TPrimeIsAnnotated[k]
            # 			HybridI_TPrimeAnnotation[k] = Emb_TPrimeAnnotation[k]

            # 	print("Hybrid I result:")
            # 	print("-"*30)
            # 	print(HybridI_TPrimeAnnotation)
            # 	print(HybridI_TPrimeIsAnnotated)
            # 	#metrics:
            # 	HybridI_TP, HybridI_FN, HybridI_FP =  MCal.MetricsCalcul(T,table_csv,HybridI_TPrimeAnnotation,HybridI_TPrimeIsAnnotated)
            # 	HybridI_TP_Total = HybridI_TP_Total+HybridI_TP
            # 	HybridI_FN_Total = HybridI_FN_Total+HybridI_FN
            # 	HybridI_FP_Total = HybridI_FP_Total+HybridI_FP
            # 	HybridI_precision, HybridI_recall, HybridI_F1 = PRF.Precision_Recall_F1(HybridI_TP_Total,HybridI_FN_Total,HybridI_FP_Total)

            #----------------------------------------------------------------
            #uncomment for Hybrid II(uncomment also embedding and RLU)
            #----------------------------------------------------------------
            # #Hybrid2
            # 	HybridII_TPrimeIsAnnotated = Emb_TPrimeIsAnnotated
            # 	HybridII_TPrimeAnnotation = Emb_TPrimeAnnotation

            # 	for k in HybridII_TPrimeIsAnnotated:
            # 		if(HybridII_TPrimeIsAnnotated[k] == True):
            # 			continue
            # 		elif(HybridII_TPrimeIsAnnotated[k] == False):
            # 			HybridII_TPrimeIsAnnotated[k] = RLU_TPrimeIsAnnotated[k]
            # 			HybridII_TPrimeAnnotation[k] = RLU_TPrimeAnnotation[k]

            # 	print("Hybrid II result:")
            # 	print("-"*30)
            # 	print(HybridII_TPrimeAnnotation)
            # 	print(HybridII_TPrimeIsAnnotated)

            # #metrics:
            # HybridII_TP, HybridII_FN, HybridII_FP =  MCal.MetricsCalcul(T,table_csv,HybridII_TPrimeAnnotation,HybridII_TPrimeIsAnnotated)
            # HybridII_TP_Total = HybridII_TP_Total+HybridII_TP
            # HybridII_FN_Total = HybridII_FN_Total+HybridII_FN
            # HybridII_FP_Total = HybridII_FP_Total+HybridII_FP
            # HybridII_precision, HybridII_recall, HybridII_F1 = PRF.Precision_Recall_F1(HybridII_TP_Total,HybridII_FN_Total,HybridII_FP_Total)

    #------------------------------------------------------------------------------------------
    # print("Final result of the core partition")
    # print("-"*30)
    # #----------------------------------------------------------------
    # print("Final Refined Lookup:")
    # print("Final RLU Precision : ", RLU_precision )
    # print("Final RLU Recall : ",RLU_recall)
    # print("Final RLU F1 : ",RLU_F1)
    # print("-"*30)
    # print("Final RLU FN ",RLU_FN_Total)
    # print("Final RLU FP ",RLU_FP_Total)
    # print("Final RLU TP ",RLU_TP_Total)
    # print("-"*30)
    #----------------------------------------------------------------
    print("Embedding:")
    print("Final Embedding Precision : ", Emb_precision)
    print("Final Embedding Recall : ", Emb_recall)
    print("Final Embedding F1 : ", Emb_F1)
    print("-" * 30)
    print("Final Embedding FN ", Emb_FN_Total)
    print("Final Embedding FP ", Emb_FP_Total)
    print("Final Embedding TP ", Emb_TP_Total)
    print("-" * 30)
    #----------------------------------------------------------------
    # print("Hybrid I:")
    # print("Final Hybrid I Precision : ", HybridI_precision )
    # print("Final Hybrid I Recall : ",HybridI_recall)
    # print("Final Hybrid I F1 : ",HybridI_F1)
    # print("-"*30)
    # print("Final Hybrid I FN ",HybridI_FN_Total)
    # print("Final Hybrid I FP ",HybridI_FP_Total)
    # print("Final Hybrid I TP ",HybridI_TP_Total)
    # print("-"*30)
    #----------------------------------------------------------------
    # print("Hybrid II:")
    # print("Final Hybrid II Precision : ", HybridII_precision )
    # print("Final Hybrid II Recall : ",HybridII_recall)
    # print("Final Hybrid II F1 : ",HybridII_F1)
    # print("-"*30)
    # print("Final Hybrid II FN ",HybridII_FN_Total)
    # print("Final Hybrid II FP ",HybridII_FP_Total)
    # print("Final Hybrid II TP ",HybridII_TP_Total)
    # print("-"*30)
    # print("-----------------------------------------------------------------------------------")
    #print("Run time for 1 lookup phase and one embedding phase, Local Endpoint ",time.time() - start_time)
    # print("-----------------------------------------------------------------------------------")
    return
コード例 #15
0
        if m <= 0 : raise ValueError("Requires m to be positive and greater than 0") 
        if t<= 0 : raise ValueError("Requires t to be positive and  greater from 0") 
        if e<0: raise ValueError("Requires eps to be positive")
        if distance != 'euclidean' and distance != 'maximum' and distance !='manhattan': raise ValueError("Requires a valid way to compute distance")
    except ValueError, err_msg:
        raise ValueError(err_msg)
        return
    
    
    if  standardization==True:
        x=Standardize.Standardize(x)
        y=Standardize.Standardize(y)
   
        
    if (m!=1) or (t!=1):
        x=Embedding.Embedding(x,m,t)
        y=Embedding.Embedding(y,m,t)

    vd=2    
    if(distance=='euclidean'):
        pass
    elif(distance=='manhattan'):
        vd=1
    elif(distance=='maximum'):
        vd=np.inf
        
    crp_tmp=np.ones((x.shape[0],y.shape[0]))
 
        
    for i in range(0,x.shape[0]): 
        x_row_rep_T=pd.concat([x.iloc[i,:]]*y.shape[0],axis=1,ignore_index=True)
コード例 #16
0
ファイル: Vectorizer.py プロジェクト: RenzhiLi/S19Research
def queryVec(query):
    q = Embedding.queryEncoder(query)
    q = BiGRU.queryGRU(q)
    return q
コード例 #17
0
ファイル: data_io.py プロジェクト: Taolan/SynTree-WordVec
def AllCalculate(clause_weight, phrase_weight, word_weight, words, word_emb):
    prefix = "data/datapre/"

    farr1 = [
        "MSRpar2012-1.txt",
        #"MSRpar2012-2.txt",
        "MSRvid2012-1.txt",
        #"MSRvid2012-2.txt",
        "OnWN2012-1.txt",
        #"OnWN2012-2.txt",
        "OnWN2013-1.txt",
        #"OnWN2013-2.txt",
        "OnWN2014-1.txt",
        #"OnWN2014-2.txt",
        "SMTeuro2012-1.txt",
        #"SMTeuro2012-2.txt",
        "SMTnews2012-1.txt",  # 4
        #"SMTnews2012-2.txt",
        "FNWN2013-1.txt",
        #"FNWN2013-2.txt",
        "SMT2013-1.txt",
        #"SMT2013-2.txt",
        "headline2013-1.txt",  # 8
        #"headline2013-2.txt",
        "headline2014-1.txt",  # 8
        #"headline2014-2.txt",
        "headline2015-1.txt",  # 8
        #"headline2015-2.txt",
        "deft-forum2014-1.txt",
        # "deft-forum2014-2.txt",
        "deft-news2014-1.txt",
        # "deft-news2014-2.txt",
        "images2014-1.txt",
        #"images2014-02.txt",
        "images2015-1.txt",  # 19
        # "images2015-2.txt",
        "tweet-news2014-1.txt",  # 14
        # "tweet-news2014-2.txt",
        "answer-forum2015-1.txt",
        # "answer-forum2015-2.txt",
        "answer-student2015-1.txt",
        # "answer-student2015-2.txt",
        "belief2015-1.txt",
        #"belief2015-2.txt",
        "sicktest-1.txt",
        # "sicktest-2.txt",
        "twitter-1.txt"
    ]
    #"twitter-2.txt"]
    farr2 = [
        # "MSRpar2012-1.txt",
        "MSRpar2012-2.txt",
        # "MSRvid2012-1.txt",
        "MSRvid2012-2.txt",
        # "OnWN2012-1.txt",
        "OnWN2012-2.txt",
        # "OnWN2013-1.txt",
        "OnWN2013-2.txt",
        # "OnWN2014-1.txt",
        "OnWN2014-2.txt",
        #"SMTeuro2012-1.txt",
        "SMTeuro2012-2.txt",
        # "SMTnews2012-1.txt", # 4
        "SMTnews2012-2.txt",
        # "FNWN2013-1.txt",
        "FNWN2013-2.txt",
        # "SMT2013-1.txt",
        "SMT2013-2.txt",
        # "headline2013-1.txt", # 8
        "headline2013-2.txt",
        #"headline2014-1.txt", # 8
        "headline2014-2.txt",
        # "headline2015-1.txt", # 8
        "headline2015-2.txt",
        # "deft-forum2014-1.txt",
        "deft-forum2014-2.txt",
        # "deft-news2014-1.txt",
        "deft-news2014-2.txt",
        # "images2014-1.txt",
        "images2014-02.txt",
        # "images2015-1.txt",   # 19
        "images2015-2.txt",
        # "tweet-news2014-1.txt", # 14
        "tweet-news2014-2.txt",
        # "answer-forum2015-1.txt",
        "answer-forum2015-2.txt",
        # "answer-student2015-1.txt",
        "answer-student2015-2.txt",
        # "belief2015-1.txt",
        "belief2015-2.txt",
        # "sicktest-1.txt",
        "sicktest-2.txt",
        # "twitter-1.txt",
        "twitter-2.txt"
    ]
    #"JHUppdb",
    #"anno-dev",
    farr_score = [
        "MSRpar2012-score.txt",
        #"MSRpar2012-2.txt",
        #"MSRvid2012",
        "MSRvid2012-score.txt",
        #"MSRvid2012-2.txt",
        "OnWN2012-score.txt",
        #"OnWN2012-2.txt",
        "OnWN2013-score.txt",
        #"OnWN2013-2.txt",
        "OnWN2014-score.txt",
        #"OnWN2014-2.txt",
        "SMTeuro2012-score.txt",
        #"SMTeuro2012-2.txt",
        "SMTnews2012-score.txt",  # 4
        #"SMTnews2012-2.txt",
        "FNWN2013-score.txt",
        #"FNWN2013-2.txt",
        "SMT2013-score.txt",
        #"SMT2013-2.txt",
        "headline2013-score.txt",  # 8
        #"headline2013-2.txt",
        "headline2014-score.txt",  # 8
        #"headline2014-2.txt",
        "headline2015-score.txt",  # 8
        #"headline2015-2.txt",
        "deft-forum2014-score.txt",
        #"deft-forum2014-2.txt",
        "deft-news2014-score.txt",
        #"deft-news2014-2.txt",
        "images2014-score.txt",
        #"images2014-02.txt",
        "images2015-score.txt",  # 19
        #"images2015-2.txt",
        "tweet-news2014-score.txt",  # 14
        #"tweet-news2014-2.txt",
        "answer-forum2015-score.txt",
        #"answer-forum2015-2.txt",
        "answer-student2015-score.txt",
        #"answer-student2015-2.txt",
        #"answer-student2015",
        "belief2015-score.txt",
        #"belief2015-2.txt",
        "sicktest-score.txt",
        #"sicktest-2.txt",
        "twitter-score.txt"
    ]
    #"twitter-2.txt"]

    for file1, file2, score in zip(farr1, farr2, farr_score):
        sentence_file_1 = prefix + file1
        sentence_file_2 = prefix + file2

        golds = datapre.getGolds(score)

        sentence_list_1 = datapre.DataPre(sentence_file_1)
        sentence_list_2 = datapre.DataPre(sentence_file_2)

        t0 = time.time()
        emb1 = Embedding.Embedding(sentence_list_1, clause_weight,
                                   phrase_weight, word_weight, words, word_emb)
        emb2 = Embedding.Embedding(sentence_list_2, clause_weight,
                                   phrase_weight, word_weight, words, word_emb)
        t1 = time.time()
        print("embedding compute time: %f seconds" % round((t1 - t0), 2))

        scores = sim_evaluate(emb1, emb2, golds)
コード例 #18
0
path_emo_lex = r"Lexicons\NRC-Hashtag-Emotion-Lexicon-v0.2\NRC-Hashtag-Emotion-Lexicon-v0.2.txt"
path_sen_lex = r"Lexicons\NRC-Hashtag-Sentiment-Lexicon-v1.0\HS-unigrams.txt"

# Generating the lexicons
lexi = l.load_lexicon(l.datareader(path_emo_lex))
lexi += l.load_lexicon(l.datareader(path_sen_lex, Elex=False), Elex=False)
print("Complete")

##
# Insert paths/type of embeddings(bert, Glove, Word2vec: Skipgram)
path_bert = 'book_corpus_wiki_en_cased'
path_glove = r"Thesis - Embeddings\GloVe\glove.6B.300d.w2vformat.txt"
path_word2vec = r"Thesis - Embeddings\Word2Vec\GoogleNews-vectors-negative300.bin"

# loading the embeding methods
glove = em.WordToVec("glove", path_glove)
word_two_vec = em.WordToVec("word2vec", path_word2vec)
bert = em.Bert("bert", path_bert)
embeds = [glove, word_two_vec, bert]

##
# Loading aggregation methods
n_avg = n_avg
all_lexi = lambda x, y: all_lexicon_avg(x, y, lexi[:-1])
esla_sentiment = lambda x, y: lexicon_avg(x, y, lexi[-1])
esla_fear = lambda x, y: lexicon_avg(x, y, lexi[1])
agg_methods = {
    "NormalAvg": n_avg,
    "HashAvg": hashtag,
    "All_lexi": all_lexi,
    "Emotion_specific": esla_fear,
コード例 #19
0
max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(df['Statement'].values)
X = tokenizer.texts_to_sequences(df['Statement'].values)
X = pad_sequences(X)


# In[ ]:


embed_dim = 128
lstm_out = 196

model = Sequential()
model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
model.add(Dropout(0.5))
model.add(LSTM(128,dropout=0.4, recurrent_dropout=0.4,return_sequences=True))
model.add(LSTM(64,dropout=0.5, recurrent_dropout=0.5,return_sequences=False))
model.add(Dense(2,activation='sigmoid',kernel_initializer='glorot_normal'))
model.compile(loss = 'categorical_crossentropy', optimizer='Nadam',metrics = ['accuracy'])
print(model.summary())


# In[ ]:


Y = pd.get_dummies(df['Label']).values
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.20, random_state = 42)
print(X_train.shape,Y_train.shape)
print(X_test.shape,Y_test.shape)
コード例 #20
0
    #step=FLAGS.step
    #n_features = FLAGS.n_features_embedding

    data1 = GetData(data_path + str(2 * step) + "/")
    print(data_path + str(2 * step) + "/")
    cluster1_mean = np.zeros(n_features)
    for d in data1:
        cluster1_mean += Embedding(d)
    cluster1_mean /= len(data1)

    data2 = GetData(data_path + str(2 * step + 1) + "/")
    cluster2_mean = np.zeros(n_features)
    for d in data2:
        cluster2_mean += Embedding(d)
    cluster2_mean /= len(data2)

    if not os.path.exists(result_path):
        os.makedirs(result_path)
    np.savetxt(result_path + str(2 * step) + "_" + str(2 * step + 1) + ".txt",
               cluster2_mean - cluster1_mean)
    np.savetxt(result_path + str(2 * step) + ".txt", cluster1_mean)
    np.savetxt(result_path + str(2 * step + 1) + ".txt", cluster2_mean)


if FLAGS.recieve_vector_of_difference_between_clusters:
    n_features = FLAGS.n_features
    for f in range(n_features):
        Get_vector_of_difference_between_clusters(FLAGS.data_path,
                                                  FLAGS.result_path, f + 1, 10,
                                                  Embedding(FLAGS.embedding))
コード例 #21
0
def JointRecurrencePlot(x,
                        y,
                        m,
                        t,
                        e,
                        distance,
                        standardization=False,
                        plot=False):
    """
    It computes and plots the joint recurrence plot of the uni/multivariate input signal(s) x and y (in pandas DataFrame format).

    **Reference :**

    * N. Marwan, M. Carmen Romano, M. Thiel and J. Kurths. "Recurrence plots for the analysis of complex systems". Physics Reports 438(5), 2007.

    :param x:
        first input signal
    :type x: pd.DataFrame

    :param y:
        second input signal
    :type y: pd.DataFrame

    :param m:
        embedding dimension
    :type m: int

    :param t:
       embedding delay
    :type t: int

    :param eps:
        threshold for recurrence
    :type eps: float

    :param distance:
        It specifies which distance method is used. It can assumes the following values:\n
        1. 'euclidean';
        2. 'maximum';
        3. 'manhattan'
        4. 'fixed distance maximum norm'

    :type distance: str

    :param standardization:
       if True data are nomalize to zero mean and unitary variance. Default: False
    :type standardization: bool


    :param plot:
       if True the plot of correlation function is returned. Default: False
    :type standardization: bool


    """
    ' Raise error if parameters are not in the correct type '
    if not (isinstance(x, pd.DataFrame)):
        raise TypeError("Requires x to be a pd.DataFrame")
    if not (isinstance(y, pd.DataFrame)):
        raise TypeError("Requires y to be a pd.DataFrame")

    if not (isinstance(m, int)): raise TypeError("Requires m to be an integer")
    if not (isinstance(t, int)): raise TypeError("Requires t to be an integer")
    if not (isinstance(e, float)):
        raise TypeError("requires eps to be a float")
    if not (isinstance(distance, str)):
        raise TypeError("Requires distance to be a string")
    if not (isinstance(standardization, bool)):
        raise TypeError("Requires standardization to be a bool")
    if not (isinstance(plot, bool)):
        raise TypeError("Requires plot to be a bool")

    ' Raise error if parameters do not respect input rules '
    if m <= 0: raise ValueError("Requires m to be positive and greater than 0")
    if t <= 0:
        raise ValueError("Requires t to be positive and  greater from 0")
    if e < 0: raise ValueError("Requires eps to be positive")
    if distance != 'euclidean' and distance != 'maximum' and distance != 'manhattan' and distance != 'rr':
        raise ValueError("Requires a valid way to compute distance")

    'Error if x and y have not the same size'
    if x.shape[0] != y.shape[0]:
        raise ValueError("The two signals have different length")

    if standardization:
        x = Standardize.Standardize(x)
        y = Standardize.Standardize(y)

    if m != 1 or t != 1:
        x = Embedding.Embedding(x, m, t)
        y = Embedding.Embedding(y, m, t)

    vd = 2
    if distance == 'euclidean':
        pass
    elif distance == 'manhattan':
        vd = 1
    elif distance == 'maximum':
        vd = np.inf

    size = x.shape[0]

    crp_x_tmp = np.ones((size, size))
    crp_y_tmp = crp_x_tmp.copy()

    curRange = range(0, size)

    x = x.reset_index(drop=True)
    y = y.reset_index(drop=True)

    if distance != 'rr':
        for i in curRange:
            crp_x = _row_rep(x, size, i, vd, e, crp_x_tmp)
            crp_y = _row_rep(y, size, i, vd, e, crp_y_tmp)
    else:
        crp_x = _rr_row_rep(x, size, vd, e, crp_x_tmp, curRange)
        crp_y = _rr_row_rep(y, size, vd, e, crp_y_tmp, curRange)

    jrp = (1 - crp_x) * (1 - crp_y)
    jrp = 1 - jrp

    result = dict()
    result['jrp'] = jrp

    if plot:
        plt.ion()
        figure = plt.figure()
        ax = figure.add_subplot(111)

        ax.set_xlabel('Time (in samples)')
        ax.set_ylabel('Time (in samples)')
        ax.set_title('Joint recurrence matrix')

        ax.imshow(result['jrp'],
                  plt.cm.binary_r,
                  origin='lower',
                  interpolation='nearest',
                  vmin=0,
                  vmax=1)

    return result
コード例 #22
0
 def __init__(self):
     self.emb = Embedding()
コード例 #23
0
ファイル: run.py プロジェクト: Taolan/SynTree-WordVec
            #"twitter-2.txt"]
prints=""
rmpc = 1
params = params.params()
params.rmpc = rmpc

parr=[]
for file1,file2,scorefile in zip(farr1,farr2,farr_score):
    sentence_file_1=prefix+file1
    sentence_file_2=prefix+file2
    golds=datapre.getGolds(prefix+scorefile)

    sentence_list_1=datapre.DataPre(sentence_file_1)
    sentence_list_2=datapre.DataPre(sentence_file_2)
            
    emb1=Embedding.Embedding(sentence_list_1,clause_weight,phrase_weight,word_weight,words,word_emb,params)
    emb2=Embedding.Embedding(sentence_list_2,clause_weight,phrase_weight,word_weight,words,word_emb,params)

    printstr=data_io.sim_evaluate(emb1,emb2,golds)
    
    parr.append(printstr)
#############################################
#print(parr)
sum2012=0
sum2013=0
sum2014=0
sum2015=0
sick=0
twitter=0
n12=0
n13=0
コード例 #24
0
        Y_i = pre.dct_reconstruct(X_l, Y, X_h)
        signal_wmd[:, i] = Y_i
    embeded = tf.istt_my(signal_wmd, length=y.shape[0])

    return embeded, ns


path = 'F:/audio_wm/audio/'
all_file = pre.filter_file(path, 'wav')
#all_file = ['./audio/batman-5min.wav']

n_dt = 8192
L_n = 32
n_code = 32  # 比特数除以4, 这里比特数是128

p0 = eb.seed_generate(L_n)
P = eb.pn_code_generate(16, p0)
np.save('F:/audio_wm/data/p0.npy', p0)
for filepath in all_file:
    print("Embedding in " + filepath)
    aFullFilename = os.path.split(filepath)[-1]
    filename = aFullFilename.split('.')[0]
    audio, sr = librosa.load(filepath, sr=44100, mono=False)

    embedded = np.zeros_like(audio)
    ns = '841F-4483-3ABE-A5D0-E496-5B23-CFA1-2AD7'
    if audio.ndim > 1:
        for j in range(audio.shape[0]):
            y = audio[j, :]
            embedded[j, :], ns = embed(y, n_dt, ns, P, n_code=32)
    else:
コード例 #25
0
    n_images = 1000
    data = GetData("../CycleGAN_shoes/Toy/shoes_boots_heels_white_black/")

    data_path0 = "../CycleGAN_shoes/Toy/My_interpretability/0/"
    data_path1 = "../CycleGAN_shoes/Toy/AutoEncoderByClusterDirection/0/"
    data_path2 = "../CycleGAN_shoes/Toy/PCA_AutoEncoder/0/"
    data_path3 = "../CycleGAN_shoes/Toy/PCA_GIST/0/"

    bar = progressbar.ProgressBar(maxval=n_images, \
                                  widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])

    data_paths = [data_path0, data_path1, data_path2, data_path3]
    simuliators = []
    for method in range(len(data_paths)):
        simuliators.append(UserSimuliator (data_paths[method], 2, 5, 3,
               Embedding("3Features"),  "../CycleGAN_shoes/Toy/shoes_boots_heels_white_black/", Embedding("LabelToyShoes"),
                 random.choice(data)))
    results = np.zeros([len(data_paths), n_iterations, 4])


    bar.start()
    for im in range(n_images):
        bar.update(im + 1)
        im_ = random.choice(data)
        for simuliator in range(len(simuliators)):
            simuliators[simuliator].put_chosen_picture(im_)
            for it in range(n_iterations):
                results[simuliator][it] += simuliators[simuliator].one_iteration()
    bar.finish()

    for simuliator in range(len(simuliators)):
コード例 #26
0
ファイル: Vectorizer.py プロジェクト: RenzhiLi/S19Research
def docVec(doc):
    d = Embedding.docEncoder(doc)
    d = BiGRU.documentGRU(d)
    return d
コード例 #27
0
import math
#%%
embedding_dim = 100
offset = 2
# %%
# read dataset and dictionary
data_train = pd.read_csv('../dataset/Train.csv')
X_train = data_train['TEXT'].values
Y_train = data_train['Label'].values
Y_train = to_categorical(Y_train)
emoji_map = pd.read_csv('../dataset/Mapping.csv')
data_test = pd.read_csv('../dataset/Test.csv')
X_test = data_test['TEXT'].values

# remove special symbols and stopwords from train set
X_rm = em.corpus_pre(X_train)

# segmentation
rm_symbols = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n'
tokenizer = Tokenizer(
    filters=rm_symbols, split=" ", lower=True
)  # filters:filter symbols that need to be removed lower:convert to lowercase
tokenizer.fit_on_texts(
    X_rm
)  # Tokenizer read train set free of special symbols. Results are stored in tokenize handle.
l2 = math.ceil(sum([len(s.split(" ")) for s in X_rm]) / len(X_rm))
X_pd, tokenizer = mo.toknz(X_rm, l2 + offset, tokenizer)
ind_dict = tokenizer.word_index
# %%
X_seq = []
for sentence in X_rm:
コード例 #28
0
            subprocess.call(["cp", im1, result_dir2 + str(index) + ".jpg"])
            index += 1


if FLAGS.create_real_pairs:
    #n_features_embedding, step, result_dir
    step = FLAGS.step
    result_dir = FLAGS.result_dir
    data_path1 = result_dir + str(2 * step) + "/"
    data_path2 = result_dir + str(2 * step + 1) + "/"

    result = result_dir + str(2 * step) + "_" + str(2 * step + 1) + "/"
    result_dir1 = result_dir + "test_AtoB_" + str(step) + "/"
    result_dir2 = result_dir + "test_BtoA_" + str(step) + "/"
    BuildPairsFromRealImages(data_path1, data_path2, result, result_dir1,
                             result_dir2, Embedding(FLAGS.embedding_name))

if FLAGS.check_ranking_for_the_clusters:

    def QualityBinaryClassifier(result):
        #result_format: [[score, truth]]
        result.sort(key=lambda x: x[0])
        result = np.array(result)
        n_ones_total = np.sum(result[:, 1])
        print(result.shape, n_ones_total)
        binary_cls_result = 0.
        n_ones_current = 0.
        for i in range(result.shape[0]):
            if (result[i][1] > 0.5):
                n_ones_current += 1.
            current_quality = (