def homses():

    dir = "training_image/"
    loc = Name + str(roll_num)
    alignment.align(dir, loc, Name, roll_num)
    #engines(text="faces are aligned")

    return render_template('trying.html',
                           prediction_text="Faces Are Now Aligned")
Ejemplo n.º 2
0
 def __init__(self, root, name='testing', mode='paired'):
     self.feat = make_dataset(os.path.join(root,name,'style'))
     self.input = make_dataset(os.path.join(root,name,'input'))
     self.mode = mode
     self.name = name
     self.trans = make_trans()
     if 'no_align' in self.mode:
         align(self.input, self.feat)
     print(len(self))
Ejemplo n.º 3
0
def align_wordvectors(*wvs, method="global"):
    target = wvs[0]
    aligned = [target]
    for wv in wvs[1:]:
        if method == "global":
            wv, tg, Q = align(wv, target)
        elif method == "noise_aware":
            Q, alpha, l, k = noise_aware(wv.vectors, target.vectors)
            wv.vectors = np.dot(wv.vectors, Q)
        aligned.append(wv)
    return aligned
Ejemplo n.º 4
0
def align(request, likelihood, path, name):

    a = alignment.align(name, likelihood, path, rel_width=0.3)

    score = data.get_score(name)
    audio_points, score_points = a.get_events(score)

    return HttpResponse(
        json.dumps({
            'audio': audio_points,
            'score': score_points,
            'duration': score.length()
        }))
Ejemplo n.º 5
0
def main(c1, c1_tag, c2, c2_tag, out, format):
    global dribble_file
    dribpath = out + ".log"
    with open(dribpath, "w") as dribfile:
        dribble.dribble_file = dribfile
        dribble.log("\nLogging to %s" % (dribpath, ))
        A = cl.read_checklist(c1, c1_tag + ".", "low-checklist")
        B = cl.read_checklist(c2, c2_tag + ".", "high-checklist")
        dribble.log("Node counts: %s %s" %
                    (len(A.get_all_nodes()), len(B.get_all_nodes())))
        # Map each B to a corresponding A
        dribble.log("Aligning ...")
        (al, xmrcas) = alignment.align(B, A)
        dribble.log("  ... finished aligning; %s articulations\n" % len(al))
        # Where do xmrcas come from?
        write_report(A, B, al, xmrcas, format, out)
        dribble.dribble_file = None
Ejemplo n.º 6
0
def main(_):
  train_dir = os.path.join(FL.output_dir, 'train')
  if not os.path.exists(FL.output_dir):
    os.mkdir(FL.output_dir)
    if not os.path.exists(train_dir):
      os.mkdir(train_dir)
  
  f_tr = open(os.path.join(FL.output_dir, 'train.txt'), 'w') 
  file_calibration = os.path.join(FL.input_dir, 'calib_cam_to_cam.txt')
  calib_camera = get_line(file_calibration, 'P_rect_02')
  imgs = sorted(os.listdir(os.path.join(FL.input_dir, 'img')))
  logging.info('Total {} images in {}'.format(len(imgs), FL.input_dir))
  
  ct = 1
  triplet, seg_triplet = [], []
  for i in range(0, len(imgs), STEPSIZE):
    img_file = os.path.join(FL.input_dir, 'img', imgs[i])
    segimg_file = os.path.join(FL.input_dir, 'segimg', imgs[i].replace('.png', '-seg.png'))
    logging.info('Processing {} ...'.format(img_file))
    img = cv2.imread(img_file)
    if os.path.exists(segimg_file):
      segimg = cv2.imread(segimg_file, 0) # read as grayscale
    else:
      segimg = np.zeros(shape=(img.shape[0], img.shape[1])) # all black
    img, segimg, cam_intr = img_scale(img, segimg, calib_camera)
    calib_representation = ','.join([str(c) for c in cam_intr.flatten()])
    triplet.append(img)
    seg_triplet.append(segimg)
    # if there are enough frames for a triplet
    if len(triplet)==3:
      output_name = str(ct).zfill(10)
      cmb = np.hstack(triplet)
      #align1, align2, align3 = seg_triplet[0], seg_triplet[1], seg_triplet[2]
      align1, align2, align3 = align(seg_triplet[0], seg_triplet[1], seg_triplet[2])
      cmb_seg = np.hstack([align1, align2, align3])
      cv2.imwrite(os.path.join(train_dir, output_name + '.png'), cmb)
      cv2.imwrite(os.path.join(train_dir, output_name + '-fseg.png'), cmb_seg)
      f = open(os.path.join(train_dir, output_name + '_cam.txt'), 'w')
      f.write(calib_representation)
      f.close()
      f_tr.write('{} {}\n'.format('train', output_name))
      del triplet[0]
      del seg_triplet[0]
      ct+=1
  f_tr.close() 
Ejemplo n.º 7
0
def run_all():
    img1 = cv2.imread(
        '/home/ee401_2/ferdyan_train/data/kitti_raw/2011_09_26/2011_09_26_drive_0048_sync/image_02_new/data/0000000002.png'
    )
    img2 = cv2.imread(
        '/home/ee401_2/ferdyan_train/data/kitti_raw/2011_09_26/2011_09_26_drive_0048_sync/image_02_new/data/0000000003.png'
    )
    img3 = cv2.imread(
        '/home/ee401_2/ferdyan_train/data/kitti_raw/2011_09_26/2011_09_26_drive_0048_sync/image_02_new/data/0000000004.png'
    )

    gbr1, gbr2, gbr3 = align(img1, img2, img3, threshold_same=0.1)

    cv2.imwrite(OUTPUT_DIR + 'gbr1.png', gbr1)
    cv2.imwrite(OUTPUT_DIR + 'gbr2.png', gbr2)
    cv2.imwrite(OUTPUT_DIR + 'gbr3.png', gbr3)

    print('done')
Ejemplo n.º 8
0
def alignment(pocket, proj_direction):
    """Principal Axes Alignment
    Returns transformation coordinates(matrix: X*3)"""
    pocket_coords = np.array([pocket.x, pocket.y, pocket.z]).T
    pocket_center = np.mean(pocket_coords,
                            axis=0)  # calculate mean of each column
    pocket_coords = pocket_coords - pocket_center  # Centralization
    inertia = np.cov(
        pocket_coords.T)  # get covariance matrix (of centralized data)
    e_values, e_vectors = np.linalg.eig(
        inertia)  # linear algebra eigenvalue eigenvector
    sorted_index = np.argsort(
        e_values)[::-1]  # sort eigenvalues (increase)and reverse (decrease)
    sorted_vectors = e_vectors[:, sorted_index]

    transformation_matrix = align(sorted_vectors, proj_direction)
    transformed_coords = (np.matmul(transformation_matrix, pocket_coords.T)).T

    return transformed_coords
Ejemplo n.º 9
0
    def compare_videos(self, path1, path2, write_skeleton=False, skeleton_out1='', skeleton_out2='',
                       write_aligned=False, aligned_out1='', aligned_out2='',
                       write_combined=False, combined_out=''):
        frames1, frames2, fps, shape1, shape2 = align(path1, path2, outpath1=aligned_out1,
                                                      outpath2=aligned_out2,
                                                      write=write_aligned)

        cvOut1 = []
        cvOut2 = []
        for i in tqdm(range(len(frames1))):
            datum1, datum2 = self.process_image_pair(frames1[i], frames2[i])
            cvOut1.append(datum1.cvOutputData)
            cvOut2.append(datum2.cvOutputData)
        if write_skeleton:
            print('1/2')
            self.write_video(skeleton_out1, cvOut1, fps, shape1)
            print('2/2')
            self.write_video(skeleton_out2, cvOut2, fps, shape2)
        if write_combined:
            check_alignment(frames1, frames2, fps, shape1, shape2, combined_out)
        return self.dance_end()
Ejemplo n.º 10
0
def compare_sequences(sequence, ncbiSeq):
    # the available commandline alignment software required fasta file names to be provided,
    # so an alignment method was required that would take sequences as input
    seqs = align(sequence, ncbiSeq)
    # propagates errors from the alignment process
    if seqs is False:
        return -1
    # separate the returned tuple
    seq, ncbiSeq = seqs

    outputSeq = ""

    # add each residue to the output sequence, taking uppercase letters from the structure sequence where they are present,
    # and lowercase letters from the ncbi sequence where no structure sequence is present
    for i in range(len(seq)):
        if seq[i] == "-":
            outputSeq += ncbiSeq[i].lower()
        else:
            outputSeq += seq[i]

    return outputSeq
Ejemplo n.º 11
0
def main():
    """
    Runs main experiments using self supervised alignment.
    """
    # wv_source = "wordvectors/latin/corpus1/0.vec"
    # wv_target = "wordvectors/latin/corpus2/0.vec"
    # wv_source = "wordvectors/source/theguardianuk.vec"
    # wv_target = "wordvectors/source/thenewyorktimes_1.vec"
    wv_source = "wordvectors/semeval/latin-corpus1.vec"
    wv_target = "wordvectors/semeval/latin-corpus2.vec"
    # wv_source = "wordvectors/usuk/bnc.vec"
    # wv_target = "wordvectors/usuk/coca_mag.vec"
    # wv_source = "wordvectors/artificial/NYT-0.vec"
    # wv_target = "wordvectors/artificial/NYT-500_random.vec"
    plt.style.use("seaborn")

    # Read WordVectors
    normalized = False
    wv1 = WordVectors(input_file=wv_source, normalized=normalized)
    wv2 = WordVectors(input_file=wv_target, normalized=normalized)

    wv1, wv2 = intersection(wv1, wv2)

    landmarks, non_landmarks, Q = s4(wv1,
                                     wv2,
                                     cls_model="nn",
                                     n_targets=100,
                                     n_negatives=100,
                                     rate=1,
                                     t=0.5,
                                     iters=100,
                                     verbose=1,
                                     plot=1)
    wv1, wv2, Q = align(wv1, wv2, anchor_words=landmarks)
    d_l = [cosine(wv1[w], wv2[w]) for w in landmarks]
    d_n = [cosine(wv1[w], wv2[w]) for w in non_landmarks]
    sns.distplot(d_l, color="blue")
    sns.distplot(d_n, color="red")
    plt.legend()
    plt.show()
Ejemplo n.º 12
0
def main():
    """
    The following experiments are available:
        - Find most stable words in each ArXiv category (cs, math, cond-mat, physics)
        - Find most unstable (changed) words in earch category
        - Finds stable/unstable words across categories
        - Using different alignment strategies
    """

    parser = argparse.ArgumentParser()
    parser.add_argument("cat1", type=str, help="Name of first arXiv category")
    parser.add_argument("cat2", type=str, help="Name of second arXiv category")

    args = parser.parse_args()

    cat1 = args.cat1
    cat2 = args.cat2

    cat1_name = cat1.split("/")[-1]
    cat2_name = cat2.split("/")[-1]

    # cat1_name = cat1.split("_")[2].rstrip(".vec")
    # cat2_name = cat2.split("_")[2].rstrip(".vec")

    path_out = "results/arxiv/"

    wva = WordVectors(input_file=cat1)
    wvb = WordVectors(input_file=cat2)
    wva, wvb = intersection(wva, wvb)
    wva, wvb, Q = align(wva, wvb)
    words = wva.words

    print("-- Common vocab", len(words))
    # each column of this matrix will store a set of results for a method
    out_grid = np.zeros((len(words), 5))

    d = distribution_of_change(wva, wvb)
    print("====== GLOBAL")
    print("=> landmarks", len(wva.words))
    print_table(d, wva.words)
    out_grid[:, 0] = d  # add first column

    print("====== Noise Aware")

    Q, alpha, landmarks, noisy = noise_aware(wva.vectors, wvb.vectors)
    wva, wvb, Q = align(wva, wvb, anchor_words=landmarks)
    print("=> landmarks", len(landmarks))
    d = distribution_of_change(wva, wvb)
    print_table(d, wva.words)
    out_grid[:, 1] = d  # add new column

    print("===== SELF")
    landmarks, nonl, Q = s4(wva, wvb, iters=100, verbose=1)
    wva, wvb, Q = align(wva, wvb, anchor_words=landmarks)
    d = distribution_of_change(wva, wvb)
    print_table(d, wva.words)
    out_grid[:, 2] = d  # last column

    # WRITE-OUT
    with open(os.path.join(path_out, "%s-%s.csv" % (cat1_name, cat2_name)),
              "w") as fout:
        fout.write("word,global,noise-aware,self,top,bot\n")
        for i, w in enumerate(words):
            fout.write("%s,%.3f,%.3f,%.3f,%.3f,%.3f\n" %
                       (w, out_grid[i][0], out_grid[i][1], out_grid[i][2],
                        out_grid[i][3], out_grid[i][4]))
Ejemplo n.º 13
0
                    ORIGINAL_HEIGHT, ORIGINAL_WIDTH, _ = img0.shape

                    zoom_x = WIDTH/ORIGINAL_WIDTH
                    zoom_y = HEIGHT/ORIGINAL_HEIGHT

                    # Adjust intrinsics.
                    calib_current = calib_camera.copy()
                    calib_current[0, 0] *= zoom_x
                    calib_current[0, 2] *= zoom_x
                    calib_current[1, 1] *= zoom_y
                    calib_current[1, 2] *= zoom_y

                    calib_representation = ','.join([str(c) for c in calib_current.flatten()])

                    if wrt == 3:
                        img0, img1, img2 = align(img0, img1, img2, threshold_same=0.5)
                        img0 = cv2.resize(img0, (WIDTH, HEIGHT))
                        img1 = cv2.resize(img1, (WIDTH, HEIGHT))
                        img2 = cv2.resize(img2, (WIDTH, HEIGHT))

                        big_img[:,0*WIDTH:(0+1)*WIDTH] = img0
                        big_img[:,1*WIDTH:(1+1)*WIDTH] = img1
                        big_img[:,2*WIDTH:(2+1)*WIDTH] = img2

                imgnum = imgnum[6:]
                print("big_img = ", big_img.shape)
                # big_imgg = cv2.cvtColor(big_img, cv2.COLOR_BGR2GRAY)

                # Tes aing
                print("1 = ", OUTPUT_DIR)
                print("2 = ", seqname)
Ejemplo n.º 14
0
def run_all():
    dir_name = INPUT_DIR + '/leftImg8bit_sequence/' + SUB_FOLDER + '/*'
    print('Processing directory', dir_name)
    for location in glob.glob(INPUT_DIR + '/leftImg8bit_sequence/' + SUB_FOLDER + '/*'):
        location_name = os.path.basename(location)
        print('Processing location', location_name)
        files = sorted(glob.glob(location + '/*.png'))
        files = [file for file in files if '-seg.png' not in file]
        # Break down into sequences
        sequences = {}
        seq_nr = 0
        last_seq = ''
        last_imgnr = -1

        for i in range(len(files)):
            seq = os.path.basename(files[i]).split('_')[1]
            nr = int(os.path.basename(files[i]).split('_')[2])
            if seq != last_seq or last_imgnr + 1 != nr:
                seq_nr += 1
            last_imgnr = nr
            last_seq = seq
            if not seq_nr in sequences:
                sequences[seq_nr] = []
            sequences[seq_nr].append(files[i])

        for (k, v) in sequences.items():
            print('Processing sequence', k, 'with', len(v), 'elements...')
            output_dir = OUTPUT_DIR + '/' + location_name + '_' + str(k)
            if not os.path.isdir(output_dir):
                os.mkdir(output_dir)
            files = sorted(v)
            triplet = []
            seg_triplet = []
            ct = 1

            # Find applicable intrinsics.
            for j in range(len(files)):
                osegname = os.path.basename(files[j]).split('_')[1]
                oimgnr = os.path.basename(files[j]).split('_')[2]
                applicable_intrinsics = INPUT_DIR + '/camera/' + SUB_FOLDER + '/' + location_name + '/' + location_name + '_' + osegname + '_' + oimgnr + '_camera.json'
                # Get the intrinsics for one of the file of the sequence.
                if os.path.isfile(applicable_intrinsics):
                    f = open(applicable_intrinsics, 'r')
                    lines = f.readlines()
                    f.close()
                    lines = [line.rstrip() for line in lines]

                    fx = float(lines[11].split(': ')[1].replace(',', ''))
                    fy = float(lines[12].split(': ')[1].replace(',', ''))
                    cx = float(lines[13].split(': ')[1].replace(',', ''))
                    cy = float(lines[14].split(': ')[1].replace(',', ''))

            for j in range(0, len(files), SKIP):
                img = cv2.imread(files[j])
                seg_path = INPUT_DIR + '/mask/' + SUB_FOLDER + '/' + location_name + '/' + os.path.basename(
                    files[j]).replace('leftImg8bit.png', 'gtFine_color.png')
                segimg = cv2.imread(seg_path)
                smallimg, segimg, fx_this, fy_this, cx_this, cy_this = crop(img, segimg, fx, fy, cx, cy)
                triplet.append(smallimg)
                seg_triplet.append(segimg)
                if len(triplet) == 3:
                    cmb = np.hstack(triplet)
                    align1, align2, align3 = align(seg_triplet[0], seg_triplet[1], seg_triplet[2])
                    cmb_seg = np.hstack([align1, align2, align3])
                    cv2.imwrite(os.path.join(output_dir, str(ct).zfill(10) + '.png'), cmb)
                    cv2.imwrite(os.path.join(output_dir, str(ct).zfill(10) + '-fseg.png'), cmb_seg)
                    f = open(os.path.join(output_dir, str(ct).zfill(10) + '_cam.txt'), 'w')
                    f.write(str(fx_this) + ',0.0,' + str(cx_this) + ',0.0,' + str(fy_this) + ',' + str(
                        cy_this) + ',0.0,0.0,1.0')
                    f.close()
                    del triplet[0]
                    del seg_triplet[0]
                    ct += 1
Ejemplo n.º 15
0
def s4(wv1,
       wv2,
       verbose=0,
       plot=0,
       cls_model="nn",
       iters=100,
       n_targets=10,
       n_negatives=10,
       fast=True,
       rate=0,
       t=0.5,
       t_overlap=1,
       landmarks=None,
       update_landmarks=True,
       return_model=False,
       debug=False):
    """
    Performs self-supervised learning of semantic change.
    Generates negative samples by sampling from landmarks.
    Generates positive samples via simulation of semantic change on random non-landmark words.
    Trains a classifier, fine-tune it across multiple iterations.
    If update_landmarks is True, then it learns landmarks from that step. In this case,
    the returned values are landmarks, non_landmarks, Q (transform matrix)
    Otherwise, landmarks are fixed from a starting set and the returned value
    is the learned classifier - landmarks must be passed.
    Arguments:
        wv1, wv2    - input WordVectors - required to be intersected before call
        verbose     - 1: display log, 0: quiet
        plot        - 1: plot functions in the end 0: do not plot
        cls_model   - classification model to use {"nn", "svm_auto", "svm_features"}
        iters       - max no. of iterations
        n_targets   - number of positive samples to generate
        n_negatives - number of negative samples
        fast        - use fast semantic change simulation
        rate        - rate of semantic change injection
        t           - classificaiton threshold (0.5)
        t_overlap   - overlap threshold for (stop criterion)
        landmarks   - list of words to use as landmarks (classification only)
        update_landmarks - if True, learns landmarks. Otherwise, learns classification model.
        debug       - toggles debugging mode on/off. Provides reports on several metrics. Slower.
    Returns:
        if update_landmarks is True:
            landmarks - list of landmark words
            non_landmarks - list of non_landmark words
            Q           - transformation matrix for procrustes alignment
        if update_landmarks is False:
            model       - binary classifier
    """

    # Define verbose prints
    if verbose == 1:

        def verbose_print(*s, end="\n"):
            print(*s, end=end)
    elif verbose == 0:

        def verbose_print(*s, end="\n"):
            return None

    wv2_original = WordVectors(words=wv2.words, vectors=wv2.vectors.copy())

    avg_window = 0  # number of iterations to use in running average

    # Begin alignment
    if update_landmarks:
        # Check if landmarks is initialized
        if landmarks == None:
            wv1, wv2, Q = align(wv1, wv2)  # start form global alignment
            landmark_dists = [
                euclidean(u, v) for u, v in zip(wv1.vectors, wv2.vectors)
            ]
            landmark_args = np.argsort(landmark_dists)
            landmarks = [
                wv1.words[i] for i in landmark_args[:int(len(wv1.words) * 0.5)]
            ]
            # landmarks = np.random.choice(wv1.words, int(len(wv1)*0.5))
        landmark_set = set(landmarks)
        non_landmarks = np.array(
            [w for w in wv1.words if w not in landmark_set])
    else:
        landmark_set = set(landmarks)
        non_landmarks = [w for w in wv1.words if w not in landmark_set]

    wv1, wv2, Q = align(wv1, wv2, anchor_words=landmarks)

    if cls_model == "nn":
        model = build_keras_model(wv1.dimension * 2)
    elif cls_model == "svm_auto" or cls_model == "svm_features":
        model = build_sklearn_model()  # get SVC

    landmark_hist = list()  # store no. of landmark history
    loss_hist = list()  # store self-supervision loss history
    alignment_loss_hist = list()  # store landmark alignment loss
    alignment_out_hist = list()  # store alignment loss outside of lm
    alignment_all_hist = list()

    cumulative_out_hist = list()
    cumulative_alignment_hist = list()  # store cumulative loss alignment
    overlap_hist = list()  # store landmark overlap history
    cumulative_overlap_hist = list()  # mean overlap history
    cumulative_loss = 0

    # History of cosines
    cos_loss_in_hist = list()
    cos_loss_out_hist = list()
    cumulative_cos_in = list()
    cumulative_cos_out = list()

    prev_landmarks = set(landmarks)
    for iter in range(iters):

        replace = dict()  # replacement dictionary
        pos_samples = list()
        pos_vectors = dict()

        # Randomly sample words to inject change to
        # If no word is flagged as non_landmarks, sample from all words
        # In practice, this should never occur when selecting landmarks
        # but only for classification when aligning on all words
        if len(non_landmarks) > 0:
            targets = np.random.choice(non_landmarks, n_targets)
            # Make targets deterministic
            #targets = non_landmarks
        else:
            targets = np.random.choice(wv1.words, n_targets)

        for target in targets:

            # Simulate semantic change in target word
            v = inject_change_single(wv2_original, target, wv1.words,
                                     wv1[target], rate)

            pos_vectors[target] = v

            pos_samples.append(target)
        # Convert to numpy array
        pos_samples = np.array(pos_samples)
        # Get negative samples from landmarks
        neg_samples = negative_samples(landmarks, n_negatives, p=None)
        neg_vectors = {w: wv2_original[w] for w in neg_samples}
        # Create dictionary of supervision samples (positive and negative)
        # Mapping word -> vector
        sup_vectors = {**neg_vectors, **pos_vectors}

        # Prepare training data
        words_train = np.concatenate((pos_samples, neg_samples))
        # assign labels to positive and negative samples
        y_train = [1] * len(pos_samples) + [0] * len(neg_samples)

        # Stack columns to shuffle data and labels together
        train = np.column_stack((words_train, y_train))
        # Shuffle batch
        np.random.shuffle(train)
        # Detach data and labels
        words_train = train[:, 0]
        y_train = train[:, -1].astype(int)

        x_train = np.array(
            [np.append(wv1[w], sup_vectors[w]) for w in words_train])

        # Append history
        landmark_hist.append(len(landmarks))
        v1_land = np.array([wv1[w] for w in landmarks])
        v2_land = np.array([wv2_original[w] for w in landmarks])
        v1_out = np.array([wv1[w] for w in non_landmarks])
        v2_out = np.array([wv2_original[w] for w in non_landmarks])

        alignment_loss = np.linalg.norm(v1_land - v2_land)**2 / len(v1_land)
        alignment_loss_hist.append(alignment_loss)
        cumulative_alignment_hist.append(
            np.mean(alignment_loss_hist[-avg_window:]))

        # out loss
        alignment_out_loss = np.linalg.norm(v1_out - v2_out)**2 / len(v1_out)
        alignment_out_hist.append(alignment_out_loss)
        cumulative_out_hist.append(np.mean(alignment_out_hist[-avg_window:]))

        # all loss
        alignment_all_loss = np.linalg.norm(wv1.vectors -
                                            wv2_original.vectors)**2 / len(
                                                wv1.words)
        alignment_all_hist.append(alignment_all_loss)

        if debug:
            # cosine loss
            cos_in = np.mean([cosine(u, v) for u, v in zip(v1_land, v2_land)])
            cos_out = np.mean([cosine(u, v) for u, v in zip(v1_out, v2_out)])
            cos_loss_in_hist.append(cos_in)
            cos_loss_out_hist.append(cos_out)
            cumulative_cos_in.append(np.mean(cos_loss_in_hist))
            cumulative_cos_out.append(np.mean(cos_loss_out_hist))

        # Begin training of neural network
        if cls_model == "nn":
            history = model.train_on_batch(x_train,
                                           y_train,
                                           reset_metrics=False)
            # history = model.fit(x_train, y_train, epochs=5, verbose=0)
            # history = [history.history["loss"][0]]
        elif cls_model == "svm_auto":
            model.fit(x_train, y_train)
            pred_train = model.predict_proba(x_train)
            history = [log_loss(y_train, pred_train)]
        elif cls_model == "svm_features":
            x_train_ = get_features(x_train)  # retrieve manual features
            model.fit(x_train_, y_train)
            pred_train = model.predict_proba(x_train_)
            y_hat_t = (pred_train[:, 0] > 0.5)
            acc_t = accuracy_score(y_train, y_hat_t)
            history = [log_loss(y_train, pred_train), acc_t]

        loss_hist.append(history[0])

        # Apply model on original data to select landmarks
        x_real = np.array([
            np.append(u, v) for u, v in zip(wv1.vectors, wv2_original.vectors)
        ])
        if cls_model == "nn":
            predict_real = model.predict(x_real)
        elif cls_model == "svm_auto":
            predict_real = model.predict_proba(x_real)
            predict_real = predict_real[:, 1]
        elif cls_model == "svm_features":
            x_real_ = get_features(x_real)
            predict_real = model.predict_proba(x_real_)
            predict_real = predict_real[:, 1]

        y_predict = (predict_real > t)

        if update_landmarks:
            landmarks = [
                wv1.words[i] for i in range(len(wv1.words))
                if predict_real[i] < t
            ]
            non_landmarks = [
                wv1.words[i] for i in range(len(wv1.words))
                if predict_real[i] > t
            ]

        # Update landmark overlap using Jaccard Index
        isect_ab = set.intersection(prev_landmarks, set(landmarks))
        union_ab = set.union(prev_landmarks, set(landmarks))
        j_index = len(isect_ab) / len(union_ab)
        overlap_hist.append(j_index)

        cumulative_overlap_hist.append(np.mean(
            overlap_hist[-avg_window:]))  # store mean

        prev_landmarks = set(landmarks)

        verbose_print(
            "> %3d | L %4d | l(in): %.2f | l(out): %.2f | loss: %.2f | overlap %.2f | acc: %.2f"
            % (iter, len(landmarks), cumulative_alignment_hist[-1],
               cumulative_out_hist[-1], history[0],
               cumulative_overlap_hist[-1], history[1]),
            end="\r")

        wv1, wv2_original, Q = align(wv1, wv2_original, anchor_words=landmarks)

        # Check if overlap difference is below threhsold
        if np.mean(overlap_hist) > t_overlap:
            break

    # Print new line
    verbose_print()

    if plot == 1:
        iter += 1  # add one to iter for plotting
        plt.plot(range(iter), landmark_hist, label="landmarks")
        plt.hlines(len(wv1.words), 0, iter, colors="red")
        plt.ylabel("No. of landmarks")
        plt.xlabel("Iteration")
        plt.show()
        plt.plot(range(iter), loss_hist, c="red", label="loss")
        plt.ylabel("Loss (binary crossentropy)")
        plt.xlabel("Iteration")
        plt.legend()
        plt.show()
        plt.plot(range(iter),
                 cumulative_alignment_hist,
                 label="in (landmarks)")
        plt.plot(range(iter), cumulative_out_hist, label="out")
        plt.plot(range(iter), alignment_all_hist, label="all")
        plt.ylabel("Alignment loss (MSE)")
        plt.xlabel("Iteration")
        plt.legend()
        plt.show()

        if debug:
            plt.plot(range(iter), cumulative_cos_in, label="cos in")
            plt.plot(range(iter), cumulative_cos_out, label="cos out")
            plt.legend()
            plt.show()

        plt.plot(range(iter), cumulative_overlap_hist, label="overlap")

        plt.ylabel("Jaccard Index", fontsize=16)
        plt.xlabel("Iteration", fontsize=16)
        plt.xticks(fontsize=16)
        plt.yticks(fontsize=16)
        # plt.legend()
        plt.tight_layout()
        plt.savefig("overlap.pdf", format="pdf")
        #plt.show()

    if update_landmarks:
        if not return_model:
            return landmarks, non_landmarks, Q
        else:
            return landmarks, non_landmarks, Q, model
    else:
        return model
Ejemplo n.º 16
0
def run_all():
  dir_name=INPUT_DIR + '/leftImg8bit_sequence/' + SUB_FOLDER + '/*'
  print('Processing directory', dir_name)
  for location in glob.glob(INPUT_DIR + '/leftImg8bit_sequence/' + SUB_FOLDER + '/*'):
    location_name = os.path.basename(location)
    print('Processing location', location_name)
    files = sorted(glob.glob(location + '/*.png'))
    files = [file for file in files if '-seg.png' not in file]
    # Break down into sequences
    sequences = {}
    seq_nr = 0
    last_seq = ''
    last_imgnr = -1

    for i in range(len(files)):
        seq = os.path.basename(files[i]).split('_')[1]
        nr = int(os.path.basename(files[i]).split('_')[2])
        if seq!=last_seq or last_imgnr+1!=nr:
            seq_nr+=1
        last_imgnr = nr
        last_seq = seq
        if not seq_nr in sequences:
            sequences[seq_nr] = []
        sequences[seq_nr].append(files[i])

    for (k,v) in sequences.items():
        print('Processing sequence', k, 'with', len(v), 'elements...')
        output_dir = OUTPUT_DIR + '/' + location_name + '_' + str(k)
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)
        files = sorted(v)
        triplet = []
        seg_triplet = []
        ct = 1

        # Find applicable intrinsics.
        for j in range(len(files)):
            osegname = os.path.basename(files[j]).split('_')[1]
            oimgnr = os.path.basename(files[j]).split('_')[2]
            applicable_intrinsics = INPUT_DIR + '/camera/' + SUB_FOLDER + '/' + location_name + '/' + location_name + '_' + osegname + '_' + oimgnr + '_camera.json'
            # Get the intrinsics for one of the file of the sequence.
            if os.path.isfile(applicable_intrinsics):
                f = open(applicable_intrinsics, 'r')
                lines = f.readlines()
                f.close()
                lines = [line.rstrip() for line in lines]

                fx = float(lines[11].split(': ')[1].replace(',', ''))
                fy = float(lines[12].split(': ')[1].replace(',', ''))
                cx = float(lines[13].split(': ')[1].replace(',', ''))
                cy = float(lines[14].split(': ')[1].replace(',', ''))

        for j in range(0, len(files), SKIP):
            img = cv2.imread(files[j])
            segimg = cv2.imread(files[j].replace('.png', '-seg.png'))

            smallimg, segimg, fx_this, fy_this, cx_this, cy_this = crop(img, segimg, fx, fy, cx, cy)
            triplet.append(smallimg)
            seg_triplet.append(segimg)
            if len(triplet)==3:
                cmb = np.hstack(triplet)
                align1, align2, align3 = align(seg_triplet[0], seg_triplet[1], seg_triplet[2])
                cmb_seg = np.hstack([align1, align2, align3])
                cv2.imwrite(os.path.join(output_dir, str(ct).zfill(10) + '.png'), cmb)
                cv2.imwrite(os.path.join(output_dir, str(ct).zfill(10) + '-fseg.png'), cmb_seg)
                f = open(os.path.join(output_dir, str(ct).zfill(10) + '_cam.txt'), 'w')
                f.write(str(fx_this) + ',0.0,' + str(cx_this) + ',0.0,' + str(fy_this) + ',' + str(cy_this) + ',0.0,0.0,1.0')
                f.close()
                del triplet[0]
                del seg_triplet[0]
                ct+=1
Ejemplo n.º 17
0

if ko(strains_file) or ko(aligned_file) or ko(samples_file):
    base_strains = bs.base_strains("../../data/Borrelia/MLST_19032019")
    # base_strains = base_strains[:22]

    samples = s.samples(base_strains)

    more_strains = {}
    for sample in samples:
        for strain in sample[1]:
            more_strains[strain.id] = strain
    more_strains = list(more_strains.values())

    strains = base_strains + more_strains
    aligned = al.align(strains)

    os.makedirs(root_dir, exist_ok=True)
    w.write(strains_file, w.json(list(map(lambda s: s.to_json(), strains))))
    w.write(aligned_file, w.json({k: str(v) for k, v in aligned.items()}))
    w.write(samples_file, w.json(list(map(s.to_json, samples))))

strains_json = r.read(strains_file, r.json)
aligned_json = r.read(aligned_file, r.json)
samples_json = r.read(samples_file, r.json)

strains = list(map(Strain.from_json, strains_json))
aligned = {k: Seq(v) for k, v in aligned_json.items()}
samples = list(map(s.from_json, samples_json))

for sample in samples:
Ejemplo n.º 18
0
                    img = cv2.resize(img, (WIDTH, HEIGHT))

                    # Remove NaN and inf values
                    img = np.nan_to_num(img)
                    img[img > 255] = 255
                    img[img < 0] = 0

                    big_img[:, wct * WIDTH:(wct + 1) * WIDTH] = img
                    wct += 1

                    # Generate seg_mask and add to list
                    seg_list.append(mask_generator.generate_seg_img(img))
                    # mask_generator.visualize()

                # Align seg_masks
                seg_list[0], seg_list[1], seg_list[2] = align(
                    seg_list[0], seg_list[1], seg_list[2])
                big_seg_img = np.zeros(shape=(HEIGHT, WIDTH * SEQ_LENGTH, 3))

                # Create seg_mask triplet
                # for k in range(0, len(seg_list)):
                #     big_seg_img[:, k * WIDTH:(k + 1) * WIDTH] = seg_list[k]
                #
                # # Remove NaN and inf values
                # big_seg_img = np.nan_to_num(big_seg_img)
                # big_seg_img[big_seg_img > 255] = 255
                # big_seg_img[big_seg_img < 0] = 0
                #
                # if True in np.isnan(big_seg_img):
                #     print("ERROR: Infinite values from seg image!")
                #     nan_check = True
                # if True in np.isinf(big_seg_img):
Ejemplo n.º 19
0
def main():
    """
    Performs tests on SemEval2020-Task 1 data on Unsupervised Lexical Semantic Change Detection.
    This experiments is designed to evaluate the performance of different landmark selection approaches,
    showing how the classification performance is affected by the landmark choices.
    """
    np.random.seed(1)

    align_methods = [
        "s4", "noise-aware", "top-10", "bot-10", "global", "top-5", "bot-5"
    ]

    parser = argparse.ArgumentParser()
    parser.add_argument("--languages",
                        nargs="+",
                        help="Languages to use",
                        default=["english", "german", "latin", "swedish"])
    parser.add_argument("--cls",
                        choices=["cosine", "s4", "cosine-auto"],
                        default="cosine",
                        help="Classifier to use")

    args = parser.parse_args()
    languages = args.languages
    classifier = args.cls

    align_params = \
    {
        "english" : {
            "n_targets": 100,
            "n_negatives": 50,
            "rate": 1,
            "iters": 100
        },
        "german" : {
            "n_targets": 100,
            "n_negatives": 200,
            "rate": 1,
            "iters": 100
        },
        "latin" : {
            "n_targets": 10,
            "n_negatives": 4,
            "rate": 0.5,
            "iters": 100
        },
        "swedish" : {
            "n_targets": 100,
            "n_negatives": 200,
            "rate": 1,
            "iters": 100
        }
    }

    cls_params = \
    {
        "english": {
            "n_targets": 100,
            "n_negatives": 50,
            "rate": 1,
            "iters": 500
        },
        "german":{
            "n_targets": 50,
            "n_negatives": 200
        },
        "latin":
        {
            "n_targets": 50,
            "n_negatives": 10
        },
        "swedish":
        {
            "n_targets": 120,
            "n_negatives": 120
        }
    }

    auto_params = \
    {
        "english":
            {
            "rate": 1.5,
            "n_fold": 1,
            "n_targets": 50,
            "n_negatives": 100
            },
        "german":
        {
            "rate":1,
            "n_fold": 1,
            "n_targets": 200,
            "n_negatives": 100
        },
        "latin":
        {
            "rate": 1,
            "n_targets": 100,
            "n_negatives": 15
        },
        "swedish":
        {
            "rate": 1,
            "n_targets": 100,
            "n_negatives": 200
        }
    }

    normalized = False
    accuracies = defaultdict(dict)
    true_positives = defaultdict(dict)
    false_negatives = defaultdict(dict)
    correct_ans = defaultdict(dict)
    cm = defaultdict(dict)
    for lang in languages:
        # print("---")
        # print(lang)
        t = 0.5
        thresholds = np.arange(0.1, 1, 0.1)
        path_task1 = "data/semeval/truth/%s.txt" % lang
        path_task2 = "data/semeval/truth/%s.txt" % lang

        with open(path_task1) as fin:
            data = map(lambda s: s.strip().split("\t"), fin.readlines())
            targets, true_class = zip(*data)
            y_true = np.array(true_class, dtype=int)
        with open(path_task2) as fin:
            data = map(lambda s: s.strip().split("\t"), fin.readlines())
            _, true_ranking = zip(*data)
            true_ranking = np.array(true_ranking, dtype=float)

        corpus1_path = "wordvectors/semeval/%s-corpus1.vec" % lang
        corpus2_path = "wordvectors/semeval/%s-corpus2.vec" % lang
        wv1 = WordVectors(input_file=corpus1_path, normalized=normalized)
        wv2 = WordVectors(input_file=corpus2_path, normalized=normalized)

        c_method = defaultdict(list)
        wv1, wv2 = intersection(wv1, wv2)
        # print("Size of common vocab.", len(wv1))
        prediction = dict()  # store per-word prediction
        for align_method in align_methods:
            accuracies[align_method][lang] = list()
            true_positives[align_method][lang] = list()
            false_negatives[align_method][lang] = list()
            cm[align_method][lang] = np.zeros((2, 2))

            if align_method == "global":
                landmarks = wv1.words
            elif align_method == "noise-aware":
                Q, alpha, landmarks, non_landmarks = noise_aware(
                    wv1.vectors, wv2.vectors)
                landmarks = [wv1.words[i] for i in landmarks]
            elif align_method == "s4":
                landmarks, non_landmarks, Q = s4(
                    wv1,
                    wv2,
                    cls_model="nn",
                    verbose=0,
                    **align_params[lang],
                )
            elif align_method == "top-10":
                landmarks = wv1.words[int(len(wv1.words) * 0.1):]
            elif align_method == "top-5":
                landmarks = wv1.words[int(len(wv1.words) * 0.05):]
            elif align_method == "top-50":
                landmarks = wv1.words[int(len(wv1.words) * 0.50):]
            elif align_method == "bot-10":
                landmarks = wv1.words[-int(len(wv1.words) * 0.1):]
            elif align_method == "bot-5":
                landmarks = wv1.words[-int(len(wv1.words) * 0.05):]
            elif align_method == "bot-50":
                landmarks = wv1.words[-int(len(wv1.words) * 0.50):]

            wv1_, wv2_, Q = align(wv1, wv2, anchor_words=landmarks)

            # Cosine-based classifier
            if classifier == "cosine":
                x = np.array([cosine(wv1_[w], wv2_[w]) for w in wv1.words])
                x = get_feature_cdf(x)
                x = np.array([x[wv1.word_id[i.lower()]] for i in targets])
                p = x.reshape(-1, 1)
                r = vote(p)
                y_pred = r

                best_acc = 0
                for t in thresholds:
                    y_bin = (y_pred > t)
                    correct = (y_bin == y_true)

                    accuracy = accuracy_score(y_true, y_bin)
                    if accuracy > best_acc:
                        prediction[align_method] = correct
                        best_acc = accuracy
                    tn, fp, fn, tp = confusion_matrix(y_true, y_bin).ravel()
                    cm[align_method][lang] += confusion_matrix(y_true,
                                                               y_bin,
                                                               normalize="all")
                    accuracies[align_method][lang].append(round(accuracy, 2))
                    true_positives[align_method][lang].append(round(tp, 2))
                    false_negatives[align_method][lang].append(round(fn, 2))
            elif classifier == "cosine-auto":
                t_cos = threshold_crossvalidation(wv1_,
                                                  wv2_,
                                                  iters=1,
                                                  **auto_params[lang],
                                                  landmarks=landmarks)
                x = np.array([cosine(wv1_[w], wv2_[w]) for w in wv1.words])
                x = get_feature_cdf(x)
                x = np.array([x[wv1.word_id[i.lower()]] for i in targets])
                p = x.reshape(-1, 1)
                r = vote(p)
                y_pred = r
                y_bin = y_pred > t_cos
                correct = (y_bin == y_true)

                accuracy = accuracy_score(y_true, y_bin)

                accuracies[align_method][lang].append(round(accuracy, 2))

            elif classifier == "s4":
                model = s4(wv1_,
                           wv2_,
                           landmarks=landmarks,
                           verbose=0,
                           **cls_params[lang],
                           update_landmarks=False)
                # Concatenate vectors of target words for prediction
                x = np.array([
                    np.concatenate((wv1_[t.lower()], wv2_[t.lower()]))
                    for t in targets
                ])
                y_pred = model.predict(x)
                y_bin = y_pred > 0.5
                correct = (y_bin == y_true)

                accuracy = accuracy_score(y_true, y_bin)
                print(accuracy)
                accuracies[align_method][lang].append(round(accuracy, 2))

            c_method[align_method] = y_pred
            rho, pvalue = spearmanr(true_ranking, y_pred)

            # print(lang, align_method, "acc", accuracies[align_method][lang],
            #                                 "\nranking", round(rho, 2),
            #                                 "landmarks", len(landmarks))

    print("|Method|Language|Mean acc.|Max acc.|")
    print("|------|--------|---------|--------|")
    for method in accuracies:
        print("|", method, end="|")
        for lang in accuracies[method]:
            print(lang,
                  round(np.mean(accuracies[method][lang]), 2),
                  np.max(accuracies[method][lang]),
                  sep="|",
                  end="|\n")
    print()
Ejemplo n.º 20
0
    string2 = string2.lower()
    
    dist_subs = stringdistances.substring_distance(string1, string2)
    synsets = wn.synsets(string1, wn.NOUN)
    if len(synsets) == 0:
        tokens = wordpunct_tokenize(string1)
        for token in tokens:
            synsets = wn.synsets(string1, wn.NOUN)
            if len(synsets) > 0:
                break
    if len(synsets) > 0:
        for synset in synsets:
            for lemma in synset.lemmas():
                dist = stringdistances.substring_distance(lemma.name(), string2)
                if (dist < dist_subs):
                    dist_subs = dist
    return dist_subs

graph1 = util.graph_from_uri('http://purl.org/dc/elements/1.1/')
graph2 = util.graph_from_uri('http://purl.org/dc/terms/')

print 'graph sizes:', len(graph1), len(graph2)
print 'num classes:', len(util.load_classes(graph1)), len(util.load_classes(graph2))

corr_list = alignment.align(graph1, graph2, threshold=0.9, method=jwnl_basic_synonym_distance)

print 'num correspondences:', len(corr_list)

for corr in corr_list:
    print corr.entity1, corr.relation, corr.entity2, corr.measure
Ejemplo n.º 21
0
def main(_):
    train_dir = os.path.join(FL.output_dir, 'train')
    if not os.path.exists(FL.output_dir):
        os.mkdir(FL.output_dir)
        if not os.path.exists(train_dir):
            os.mkdir(train_dir)

    f_tr = open(os.path.join(FL.output_dir, 'train.txt'), 'w')
    file_calibration = os.path.join(FL.input_dir, 'calib.txt')
    calib_camera = get_camera_intrinsic(file_calibration)
    file_train_list = os.path.join(FL.input_dir, 'img', 'train_list.txt')
    train_list = get_lines(file_train_list)

    for clip in train_list:
        imgs = sorted(os.listdir(os.path.join(FL.input_dir, 'img', clip)))
        logging.info('Total {} images in clip {}'.format(len(imgs), clip))
        clip_output_dir = os.path.join(train_dir, clip)
        if not os.path.exists(clip_output_dir):
            os.mkdir(clip_output_dir)

        # initialize CVAT annotation parser for bounding boxes
        xml_file = os.path.join(FL.input_dir, 'img', clip + '.xml')
        anno_parser = cvat_anno_parser(xml_file)

        ct = 1
        triplet, seg_triplet = [], []
        for i in range(0, len(imgs), STEPSIZE):
            img_file = os.path.join(FL.input_dir, 'img', clip, imgs[i])
            logging.info('Processing {} ...'.format(img_file))
            img = cv2.imread(img_file)
            segimg = anno_parser.get_seg_map(imgs[i],
                                             (img.shape[0], img.shape[1]),
                                             color='gray')
            img, segimg, cam_intr = img_scale(img, segimg, calib_camera)
            calib_representation = ','.join(
                [str(c) for c in cam_intr.flatten()])
            triplet.append(img)
            seg_triplet.append(segimg)
            # if there are enough frames for a triplet
            if len(triplet) == 3:
                output_name = str(ct).zfill(10)
                cmb = np.hstack(triplet)
                align1, align2, align3 = align(seg_triplet[0], seg_triplet[1],
                                               seg_triplet[2])
                cmb_seg = np.hstack([align1, align2, align3])
                cv2.imwrite(
                    os.path.join(clip_output_dir, output_name + '.png'), cmb)
                cv2.imwrite(
                    os.path.join(clip_output_dir, output_name + '-fseg.png'),
                    cmb_seg)
                f = open(
                    os.path.join(clip_output_dir, output_name + '_cam.txt'),
                    'w')
                f.write(calib_representation)
                f.close()
                f_tr.write('{} {}\n'.format(os.path.join('train', clip),
                                            output_name))
                del triplet[0]
                del seg_triplet[0]
                ct += 1
    f_tr.close()
Ejemplo n.º 22
0
facePredictor = os.path.join(fileDir, 'shape_predictor_68_face_landmarks.dat')
alignDlib = openface.AlignDlib(facePredictor)
alignment = alignment.Alignment(args.dim, template, delaunay.simplices)

print('processing images...')

for index in range(args.num):

    ret, rawImage = videoCapture.read()

    if not ret: break
    
    boundingBox = alignDlib.getLargestFaceBoundingBox(rawImage)
    landmarks = alignDlib.findLandmarks(rawImage, boundingBox)

    alignedImage = alignment.align(rawImage, landmarks)

    convertedImage = cv2.cvtColor(alignedImage, cv2.COLOR_RGB2GRAY)

    equalizedImage = cv2.equalizeHist(convertedImage)

    markedImage = rawImage.copy()

    for triangle in delaunay.simplices:

        cv2.line(markedImage, landmarks[triangle[0]], landmarks[triangle[1]], (255, 0, 255))
        cv2.line(markedImage, landmarks[triangle[1]], landmarks[triangle[2]], (255, 0, 255))
        cv2.line(markedImage, landmarks[triangle[2]], landmarks[triangle[0]], (255, 0, 255))

    cv2.imwrite(os.path.join(args.dir, '..', 'raw images', str(index).zfill(3) + '.jpg'), 
        rawImage[landmarks[30][1] - 100:landmarks[30][1] + 100, landmarks[30][0] - 100:landmarks[30][0] + 100])
Ejemplo n.º 23
0
import h5py
import mmsdk
from mmsdk import mmdatasdk
from mmsdk.mmmodelsdk.fusion import TensorFusion
import numpy
import pickle
from random import shuffle
import time


#Loading the data of Social-IQ
#Yellow warnings fro SDK are ok!
if os.path.isdir("./deployed/") is False:
	print ("Need to run the modality alignment first")
	from alignment import align,myavg
	align()
 
paths={}
paths["QA_BERT_lastlayer_binarychoice"]="./socialiq/SOCIAL-IQ_QA_BERT_LASTLAYER_BINARY_CHOICE.csd"
paths["DENSENET161_1FPS"]="./deployed/DENSENET161_1FPS.csd"
paths["Transcript_Raw_Chunks_BERT"]="./deployed/Transcript_Raw_Chunks_BERT.csd"
paths["Acoustic"]="./deployed/Acoustic.csd"
social_iq=mmdatasdk.mmdataset(paths)
social_iq.unify() 




def qai_to_tensor(in_put,keys,total_i=1):
	data=dict(in_put.data)
	features=[]
Ejemplo n.º 24
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("alignment",
                        choices=[
                            'top-5', 'top-10', 'noise-aware', 'bot-5',
                            'bot-10', 'global', 's4'
                        ],
                        default="top",
                        help="Method to use in the alignment of UK to US")
    parser.add_argument("--rounds",
                        type=int,
                        default=1,
                        help="No. of rounds to run the classifications")

    args = parser.parse_args()

    path_us = "wordvectors/ukus/coca.vec"
    path_uk = "wordvectors/ukus/bnc.vec"
    path_dict = "data/ukus/dict_similar.txt"
    path_dict_dis = "data/ukus/dict_dissimilar.txt"

    normalized = False

    wv1 = WordVectors(input_file=path_uk, normalized=normalized)
    wv2 = WordVectors(input_file=path_us, normalized=normalized)

    wv_uk, wv_us = intersection(wv1, wv2)

    # Load dictionaries of words
    with open(path_dict) as fin:
        dico_sim = list(map(lambda s: s.strip().split(" ", 1),
                            fin.readlines()))

    with open(path_dict_dis) as fin:
        dico_dis = list(map(lambda s: (s.strip(), s.strip()), fin.readlines()))

    # Filter words not in the vocabulry of either UK or US corpora
    dico_sim = [(a, b) for a, b in dico_sim
                if a in wv_uk.word_id and b in wv_us.word_id]
    dico_dis = [(a, b) for a, b in dico_dis
                if a in wv_uk.word_id and b in wv_us.word_id]
    dico = dico_sim + dico_dis
    # Create true labels for terms
    # 0 -> similar | 1 -> dissimilar
    y_true = [0] * len(dico_sim) + [1] * len(dico_dis)

    m = args.alignment
    # Align wordvectors (using any alignment approach)
    if m == "noise-aware":
        Q, alpha, landmarks, noise = noise_aware(wv_uk.vectors, wv_us.vectors)
        landmarks = [wv_uk.words[i] for i in landmarks]
        a_, b_, Q = align(wv_uk, wv_us, anchor_words=landmarks)
    elif m == "global":
        landmarks = wv_us.words
        a_, b_, Q = align(wv_uk, wv_us, anchor_words=landmarks)
        landmarks = landmarks[:len(landmarks) // 2]
    elif m == "s4":
        landmarks = wv_us.words
        a_, b_, Q = align(wv_uk, wv_us, anchor_words=landmarks)
        landmarks, non_landmarks, Q = s4(
            wv_uk,
            wv_us,
            cls_model="nn",
            verbose=0,
            iters=100,
            n_targets=100,
            n_negatives=10,
            rate=0.25,
        )

        a_, b_, Q = align(wv_uk, wv_us, anchor_words=landmarks)
    elif m == "top-10":
        landmarks = wv_us.words[:int(len(wv_us.words) * 0.1)]
    elif m == "top-5":
        landmarks = wv_us.words[:int(len(wv_us.words) * 0.05)]
    elif m == "bot-10":
        landmarks = wv_us.words[-int(len(wv_us.words) * 0.1):]
    elif m == 'bot-5':
        landmarks = wv_us.words[-int(len(wv_us.words) * 0.05):]

    a_, b_, Q = align(wv_uk, wv_us, anchor_words=landmarks)

    wv1_ = WordVectors(words=wv1.words, vectors=np.dot(wv1.vectors, Q))

    test_pairs = dico
    # print("Landmarks", len(landmarks))
    # Train classifier
    self_scores = list()
    cos_scores = list()
    na_scores = list()
    iters = 100

    # Interval to vary cosine thresholds
    cos_thresholds = [0.3, 0.5, 0.7]

    # Run several rounds, if given
    for r in range(args.rounds):
        model = s4(a_,
                   b_,
                   iters=iters,
                   landmarks=landmarks,
                   verbose=0,
                   n_targets=1000,
                   n_negatives=1000,
                   rate=0.25,
                   cls_model="nn",
                   update_landmarks=False)

        acc = 0
        acc_cos = 0
        total = 0
        y_pred = list()
        y_pred_cos = list()
        try:
            x = np.array(
                [np.concatenate((wv1_[p[0]], wv2[p[1]])) for p in test_pairs])
            x_cos = np.array(
                [cosine(wv1_[p[0]], wv2[p[1]]) for p in test_pairs])

            # Predict with noise-aware
            # Generate pairs (u, v) and apply noise-aware
            # 0 if pair is clean, 1 if pair is noisy

            v_a = np.array([wv1_[p[0]] for p in test_pairs])
            v_b = np.array([wv2[p[1]] for p in test_pairs])
            Q, alpha, clean, noisy = noise_aware(v_a, v_b)

            y_pred_na = np.zeros((len(test_pairs)))
            for i in noisy:
                y_pred_na[i] = 1

        except KeyError as e:  # skip word if not in model
            pass
        y_hat = model.predict(x)
        y_pred = (y_hat > 0.5)

        self_acc = accuracy_score(y_true, y_pred)
        self_prec = precision_score(y_true, y_pred)
        self_rec = recall_score(y_true, y_pred)
        self_f1 = f1_score(y_true, y_pred)
        self_scores.append([self_acc, self_prec, self_rec, self_f1])

        # Cosine metrics
        # Compute average over multiple runs
        cos_acc = cos_prec = cos_rec = cos_f1 = 0
        for t in cos_thresholds:
            y_pred_cos = (x_cos > t)
            cos_acc = round(accuracy_score(y_true, y_pred_cos), 2)
            cos_prec = round(precision_score(y_true, y_pred_cos), 2)
            cos_rec = round(recall_score(y_true, y_pred_cos), 2)
            cos_f1 = round(f1_score(y_true, y_pred_cos), 2)

            cos_scores.append([cos_acc, cos_prec, cos_rec, cos_f1])

        # Noise-Aware metrics
        na_acc = round(accuracy_score(y_true, y_pred_na), 2)
        na_prec = round(precision_score(y_true, y_pred_na), 2)
        na_rec = round(recall_score(y_true, y_pred_na), 2)
        na_f1 = round(f1_score(y_true, y_pred_na), 2)
        na_scores.append([na_acc, na_prec, na_rec, na_f1])

    self_scores = np.array(self_scores)
    cos_scores = np.array(cos_scores)
    na_scores = np.array(na_scores)

    # Print Markdown Table
    for j, t in enumerate(cos_thresholds):
        print("|COS %.2f" % t, m, sep="|", end="|")
        for i in range(4):
            print("%.2f" % (round(cos_scores[j:, i].mean(), 2)),
                  end="|",
                  sep=" ")
        print("|")
    print("|")
    print("|S4-D", m, end="|", sep="|")
    for i in range(4):
        print("%.2f +- %.2f" % (round(self_scores[:, i].mean(),
                                      2), round(self_scores[:, i].std(), 2)),
              end="|",
              sep=" ")
    print("|")
    print("|Noisy-Pairs", "-", *na_scores[0], sep="|", end="|\n")