def alignment_v1(w2v, threshold, training_positive_annots,
                 training_negative_annots, test_positive_annots,
                 test_negative_annots):
    train_positive_words = [
        entry["words"] for entry in training_positive_annots
    ]
    train_negative_words = [
        entry["words"] for entry in training_negative_annots
    ]
    test_positive_words = [entry["words"] for entry in test_positive_annots]
    test_negative_words = [entry["words"] for entry in test_negative_annots]

    train_positive_masks = [
        entry["mask"] for entry in training_positive_annots
    ]
    train_negative_masks = [
        entry["mask"] for entry in training_negative_annots
    ]
    test_positive_masks = [entry["mask"] for entry in test_positive_annots]
    test_negative_masks = [entry["mask"] for entry in test_negative_annots]

    alignment_method = alignment.Alignment(w2v)

    alignment_method.train_v1(train_positive_words, train_positive_masks,
                              train_negative_words, train_negative_masks)

    train_positive_labels = np.ones(len(train_positive_words), ).tolist()
    train_negative_labels = np.zeros(len(train_negative_words), ).tolist()
    train_labels = train_positive_labels + train_negative_labels
    test_positive_labels = np.ones(len(test_positive_words), ).tolist()
    test_negative_labels = np.zeros(len(test_negative_words), ).tolist()
    test_labels = test_positive_labels + test_negative_labels

    train_data = train_positive_words + train_negative_words
    training_pred_scores = []
    training_pred_masks = []
    for words in train_data:
        score, mask = alignment_method.predict_v1(words)
        training_pred_scores.append(score)
        training_pred_masks.append(mask)
    training_pred_labels = [
        float(score >= threshold) for score in training_pred_scores
    ]

    test_data = test_positive_words + test_negative_words
    testing_pred_scores = []
    testing_pred_masks = []
    for words in test_data:
        score, mask = alignment_method.predict_v1(words)
        testing_pred_scores.append(score)
        testing_pred_masks.append(mask)
    testing_pred_labels = [
        float(score >= threshold) for score in testing_pred_scores
    ]

    return np.array(training_pred_labels), np.array(
        testing_pred_labels), np.array(train_labels), np.array(
            test_labels), np.array(training_pred_scores), np.array(
                training_pred_masks), np.array(testing_pred_scores), np.array(
                    testing_pred_masks)
def alignment_v2(w2v,training_positive_annots,training_negative_annots,test_positive_annots,test_negative_annots):
    train_positive_words = [entry["words"] for entry in training_positive_annots]
    train_negative_words = [entry["words"] for entry in training_negative_annots]
    test_positive_words = [entry["words"] for entry in test_positive_annots]
    test_negative_words = [entry["words"] for entry in test_negative_annots]

    train_positive_masks = [entry["mask"] for entry in training_positive_annots]
    train_negative_masks = [entry["mask"] for entry in training_negative_annots]
    test_positive_masks = [entry["mask"] for entry in test_positive_annots]
    test_negative_masks = [entry["mask"] for entry in test_negative_annots]

    alignment_method = alignment.Alignment(w2v)

    alignment_method.train_v2(train_positive_words,train_positive_masks,train_negative_words,train_negative_masks)

    train_positive_labels = np.ones(len(train_positive_words),).tolist()
    train_negative_labels = np.zeros(len(train_negative_words),).tolist()
    train_labels = train_positive_labels + train_negative_labels
    test_positive_labels = np.ones(len(test_positive_words),).tolist()
    test_negative_labels = np.zeros(len(test_negative_words),).tolist()
    test_labels = test_positive_labels + test_negative_labels

    train_data = train_positive_words + train_negative_words
    training_pred_scores = []
    training_pred_masks = []
    training_pred_labels = []
    for words in train_data:
        pos_score,pos_mask,neg_score,neg_mask = alignment_method.predict_v2(words)
        if pos_score > neg_score:
            score = pos_score
            mask = pos_mask
            label = 1.0
        else:
            score = 1 - neg_score
            mask = neg_mask
            label = 0.0
        training_pred_scores.append(score)
        training_pred_masks.append(mask)
        training_pred_labels.append(label)

    test_data = test_positive_words + test_negative_words
    testing_pred_scores = []
    testing_pred_masks = []
    testing_pred_labels = []
    for words in test_data:
        pos_score,pos_mask,neg_score,neg_mask = alignment_method.predict_v2(words)
        if pos_score > neg_score:
            score = pos_score
            mask = pos_mask
            label = 1.0
        else:
            score = 1 - neg_score
            mask = neg_mask
            label = 0.0
        testing_pred_scores.append(score)
        testing_pred_masks.append(mask)
        testing_pred_labels.append(label)

    return np.array(training_pred_labels), np.array(testing_pred_labels), np.array(train_labels), np.array(test_labels), np.array(training_pred_scores), np.array(training_pred_masks), np.array(testing_pred_scores), np.array(testing_pred_masks) 
예제 #3
0
def parseGumby(gumbyFile, exonFile, baseSeq):
# parses gumbyFile, removes things that overlap exons and gumbies that consist only of gaps on baseSeq
# returns a list of gumbyBlocks

    infile = open(gumbyFile, "r")

    exons = []
    if exonFile!=None:
        fh = open(exonFile, "r")
        for l in fh:
            fs = l.split()
            if fs[0].lower()!=baseSeq:
                continue
            exons.append([ int(fs[3]), int(fs[4]) ] )
    # print exons

    re1 = compile("[a-z]+[ ]+[0-9]+[ ]+[0-9]+")
    seqs = {}
    pos = {}
    i = -1

    resultLst = alignment.Alignment()
    for l in infile:
        l = l.strip()
        l = l.replace("*","-")
        l = l.replace("<", "-")
        l = l.replace(">", "-")
        if l.startswith("start"):
            if i!=-1:
                resultLst.extend(procData(baseSeq, exons, i, seqs, pos, pval, length, score))
            f = l.split()
            pval = float(f[-1])
            length = int(f[6].strip(","))
            score = int(f[8].strip(","))
            i+=1
            seqs={}

        if re1.match(l):
            f = l.split()
            name = f[0]
            start = int(f[1])-1
            end = int(f[2])-1

            seq = f[3]
            if name not in seqs:
                faseq = Fasta.FastaSeq(name, seq)
                faseq.chrom = name
                faseq.start = start
                faseq.end = end
                seqs[name] = faseq
            else:
                faseq = seqs[f[0]] 
                faseq.nucl += f[3]
            pos[name] = (name, start,end)

    resultLst.extend(procData(baseSeq, exons, i, seqs, pos, pval, length, score))
    return resultLst
예제 #4
0
 def get_best_alignment(self):
     state_path = seq.Sequence("State path", "")
     current_cell = self.get_end_cell()
     score = current_cell.value
     if score > -INFINITY:
         current_cell = current_cell.parent
         while not current_cell.state.is_begin():
             state_path.append(current_cell.state.short_name)
             current_cell = current_cell.parent
         state_path.reverse()
     else:
         state_path = None
     return alignment.Alignment(self.sequence, state_path, score)
예제 #5
0
def sample(hmm, observations):
    """
    Samples a finite number of times (observations) the given HMM. returns two sequences: State path and Emission sequence.
    """
    random.seed() # force reseeding

    state_path = seq.Sequence("State path", "")
    emission_sequence = seq.Sequence("Sequence", "")

    current_state = hmm.begin_state()
    for i in range(observations):
        current_state = current_state.sample_transition()
        if current_state.is_end():
            break
        state_path.append(current_state.short_name)
        emission_sequence.append(current_state.sample_emission())

    return alignment.Alignment(emission_sequence, state_path)
예제 #6
0
def using_constraint_solver(original_csv, output_csv, tempo=120, threshold=7000, offset=0.00, leniency=0.3, num_agents=2):
    original_freq_list = mt.create_note_list(original_csv, tempo, threshold, offset=-0.08)
    # print(original_freq_list)
    original_freq_list = mt.number_converter(original_freq_list)
    original_solutions = cs.get_solutions(original_freq_list, num_agents)
    print("Original :", original_solutions[0])

    # May need to change threshold for output freq list
    output_freq_list = mt.create_note_list(output_csv, tempo, threshold=500, offset=-0.08)
    output_freq_list = mt.number_converter(output_freq_list)
    output_solutions = cs.get_solutions(output_freq_list, num_agents)
    for agent, output_solution in enumerate(output_solutions):
        index = 0
        while index < len(output_solution) and output_solution[index] == -1:
            index += 1
        output_solutions[agent] = output_solution[index:]

    # print("Output: ", output_solutions[0][20:25])
    # for output_solution in output_solutions:
    #     while output_solution[0] == -1:
    #         output_solution = output_solution[1:]
    print("Output:", output_solutions[0])
    print("Test: ")
    tester = al.Alignment()
    a, b = al.Hirschberg.align(tester, seq_a=list(original_solutions[0]), seq_b=list(output_solutions[0]))
    print(a)
    print(b)
    num_errors = 0.0

    for i in range(len(original_solutions)):
        for j in range(len(original_solutions[0])):
            if original_solutions[i][j] != output_solutions[i][j]:
                num_errors += 1

    if original_solutions and original_solutions[0]:
        return num_errors / float(len(original_solutions)*len(original_solutions[0]))
    else:
        return -1
예제 #7
0
    def __init__(self, file_path):
        file_path = file_loader.format_file_path_for_speech_object(file_path)
        alignment_root_dir = "/data/corpora/cspan/alignments/"
        transcript_root_dir = "/data/corpora/cspan/transcripts_clean/"
        audio_root_dir = "/data/corpora/cspan/audio/"
        applause_times_root_dir = "/data/corpora/cspan/applause_times/"
        self.file_path = file_path
        self.alignment_file = alignment_root_dir + file_path + "/" + file_path.split(
            '/')[1] + "_single_speaker.json"
        self.transcript_file = transcript_root_dir + file_path + ".txt"
        self.audio_file = audio_root_dir + file_path + ".mp3"
        self.applause_times_file = applause_times_root_dir + file_path + ".txt"
        self.load_stored_applause_predictions(file_path)
        self.load_stored_crowd_rmse()
        self.alignment = alignment.Alignment(self.alignment_file)
        self.alignment.speech = self
        self.applause_preds_by_second = self.get_preds_by_second()
        self.applause_list = applause_list.ApplauseList(
            self.applause_times_file)
        self.phrase_audio_features = self.get_phrase_audio_features()

        self.frame_rate = 43.06640625
        self.hop_size = 512
        self.sr = 22050
예제 #8
0
studentTitle = 'weiguojia_section_amateur'

teacherMonoNoteOutFilename = segmentFileFolder + teacherTitle + '_monoNoteOut_midi.txt'
studentMonoNoteOutFilename = segmentFileFolder + studentTitle + '_monoNoteOut_midi.txt'

teacherRepresentationFilename = segmentFileFolder + teacherTitle + '_representation.json'
studentRepresentationFilename = segmentFileFolder + studentTitle + '_representation.json'

teacherNoteAlignedFilename = outputFileFolder + teacherTitle + '_noteAligned.csv'
studentNoteAlignedFilename = outputFileFolder + studentTitle + '_noteAligned.csv'

teacherSegAlignedFilename = outputFileFolder + teacherTitle + '_segAligned.csv'
studentSegAlignedFilename = outputFileFolder + studentTitle + '_segAligned.csv'

cs1 = cs.ConcatenateSegment()
align1 = align.Alignment()

#################################################### note alignment ####################################################

# read note file
noteStartingTime_t, noteDurTime_t, midiNote_t = cs1.readPyinMonoNoteOutMidi(
    teacherMonoNoteOutFilename)
noteStartingTime_s, noteDurTime_s, midiNote_s = cs1.readPyinMonoNoteOutMidi(
    studentMonoNoteOutFilename)
noteStartingFrame_t, noteEndingFrame_t = cs1.getNoteFrameBoundary(
    noteStartingTime_t, noteDurTime_t)
noteStartingFrame_s, noteEndingFrame_s = cs1.getNoteFrameBoundary(
    noteStartingTime_s, noteDurTime_s)

# get concatenated pitch track
notePts_t, noteStartingFrameConcatenate_t, noteEndingFrameConcatenate_t = \
예제 #9
0
from xml_handling import UniprotXMLHandler
from ete3 import NCBITaxa
import db_handling
import alignment
import operator
import numpy as np
import os
import sys

#Accession.download_uniprot_xml()

# uniprotHandler = UniprotXMLHandler(iterate=True)

ncbi = NCBITaxa()

align = alignment.Alignment()

protDB = db_handling.ProteinDatabase()

filtering_states = protDB.get_filtering_states()

for state in filtering_states:
    current_state = protDB.get_current_state()
    rank = state[0]
    if rank == current_state[0]:
        if current_state[2]:
            if not current_state[4]:
                align.iterate_ellection_taxon_rank_representatives(
                    taxon_rank=rank, current_state=current_state)

                protDB.set_next_filtering_state()
예제 #10
0
parser.add_argument('--num', type=int, default=1000, help='num of images')
parser.add_argument('--dim', type=int, default=96, help='alignment dimension')
args = parser.parse_args()

print(args)

videoCapture = cv2.VideoCapture(os.path.join(args.dir, '001.mp4'))

if not videoCapture.isOpened(): sys.exit('video not opened')

template = np.load(os.path.join(fileDir, 'template.npy'))
delaunay = scipy.spatial.Delaunay(template)

facePredictor = os.path.join(fileDir, 'shape_predictor_68_face_landmarks.dat')
alignDlib = openface.AlignDlib(facePredictor)
alignment = alignment.Alignment(args.dim, template, delaunay.simplices)

print('processing images...')

for index in range(args.num):

    ret, rawImage = videoCapture.read()

    if not ret: break
    
    boundingBox = alignDlib.getLargestFaceBoundingBox(rawImage)
    landmarks = alignDlib.findLandmarks(rawImage, boundingBox)

    alignedImage = alignment.align(rawImage, landmarks)

    convertedImage = cv2.cvtColor(alignedImage, cv2.COLOR_RGB2GRAY)
def print_alignment(hyp, ref):
    a = alignment.Alignment()
    a.align(hyp, ref)  #input is string!!!
    print "Formatted hypothesis: ", a.hstring
    print "Formatted reference: ", a.rstring
    print "Substitution: ", a.numSub, "Insertion: ", a.numIns, "Deletion: ", a.numDel
예제 #12
0
LOGFILE.write("%s%s\n" % ("Line width is ", width))
MSSAFILE = open(mssaFile, "w")

# Set up data structure for holding alignments
pairwise = {  # dict holding next pairwise alignment 
    "matchName": "",  # name of sequence aligned to reference
    "referenceLine":
    "",  # reference sequence (complete) with possible gaps as '-'
    "correspondenceLine": "",  # string of characters: blank, '.', or ':'
    "matchLine":
    "",  # match sequence (maybe incomplete) with possible gaps as '-'
}
failure = 0  # will be test result from method call

# Create an Alignment object
myAlignment = alignment.Alignment(format)

if CHATTY:
    print "Acquiring R-R correspondences from input file..."

# Switches that control handling of input file data lines
FASTA = False
ALIGN = False

fLines = INFILE.read().splitlines()
lineCount = len(fLines)

for i in xrange(0, lineCount):
    nextLine = fLines[i]  # Get next data line
    if (nextLine == '' or nextLine == '\^#'):  # comment line starts with '#'
        continue  # skip blank and comment lines
예제 #13
0
파일: extractor.py 프로젝트: jungikim/sbmt
                if etree is None:
                    log.write("bad tree, skipped line\n")
                    continue

                #espans = label_spans(etree)
                eleaves = list(etree.frontier())
                ewords = [leaf.label for leaf in eleaves]
                etags = [leaf.parent.label for leaf in eleaves]
                ecb = crossing_brackets(etree)
            except:
                log.write("bad etree, skipped: %s\n" % estr)
                continue
        else:
            ewords = estr.split()
            
        a = alignment.Alignment(fwords, ewords)
        good = True
        if astr.strip() == "":
            log.write("skipping sentence with empty alignment\n")
            continue
        for ij in astr.split():
            i,j = (int(x) for x in ij.split('-',1))
            try:
                a.align(i,j)
            except IndexError:
                log.write("alignment point %s-%s out of bounds\n" % (i,j))
                good = False
                break
        if not good:
            log.write("french:    %s\n" % " ".join(fwords))
            log.write("english:   %s\n" % " ".join(ewords))