Esempio n. 1
0
 def predict(self, inpseq, useCascade = True):
     """ Classify each symbol in a sequence.
         Return the predictions as a list of symbols. """
     W = self.nn1.ninput / len(self.inp_alpha)
     if useCascade and self.cascade:
         nn1seq = self.predict(inpseq, useCascade = False)
         subseqs = slidewin(nn1seq, self.cascade)
         predsyms = ['C' for _ in range(len(inpseq))] # use coil for positions in flanking regions
         for i in range(len(subseqs)):    # for each input sub-sequence of the primary NN
             input = numpy.zeros(self.cascade * len(self.outp_alpha))
             input[_onehotIndex(self.outp_alpha, subseqs[i])] = 1
             outvec = self.nn2.feedforward(input)
             d = prob.Distrib(self.outp_alpha)
             for k in range(len(outvec)):
                 d.observe(self.outp_alpha[k], outvec[k])
             predsyms[i + self.cascade / 2] = d.getmax()    # use the symbol with the highest probability
         return sequence.Sequence(predsyms, self.outp_alpha)
     else: # only predict using the first NN
         subseqs = slidewin(inpseq, W)
         predsyms = ['C' for _ in range(len(inpseq))] # use coil for positions in flanking regions
         for i in range(len(subseqs)):    # for each input sub-sequence of the primary NN
             input = numpy.zeros(self.inp_len * len(self.inp_alpha))
             input[_onehotIndex(self.inp_alpha, subseqs[i])] = 1
             outvec = self.nn1.feedforward(input)
             d = prob.Distrib(self.outp_alpha)
             for k in range(len(outvec)):
                 d.observe(self.outp_alpha[k], outvec[k])
             predsyms[i + W / 2] = d.getmax()    # use the symbol with the highest probability
         return sequence.Sequence(predsyms, self.outp_alpha)
Esempio n. 2
0
    def sort_sequence(self, files):
        #print ''

        res = []

        currentSeq = sequence.Sequence()
        for file_item in files:

            name = sequence.SeqString(file_item.basename())

            sequenceSplit = False

            if not currentSeq.match(name, self.numPos):
                sequenceSplit = True

            if sequenceSplit:
                res.append(currentSeq)
                currentSeq = sequence.Sequence()

            currentSeq.append(name, file_item)

        if len(currentSeq) > 0:
            res.append(currentSeq)

        content_list = []

        for item in res:

            if len(item) <= 1:
                content_list.append(item[0])

            else:
                f = item[0]

                if isinstance(f, content_types.ImageFile):
                    c = content_types.ImageSequence
                else:
                    c = content_types.FileSequence

                content_item = c(dirname=f.dirname(),
                                 sequence_object=item,
                                 mimetype=f.mimetype())

                content_item.set_common_prefix(f.common_prefix())

                #print content_item[0]
                #for item in content_item:
                #print item
                #print content_item

                #print item.ranges()
                #print item.sequenceName()

                content_list.append(content_item)

        return content_list
Esempio n. 3
0
 def _backwardParsimony(self, aln, seq=None):
     """ Internal function that operates recursively to inspect scores to determine
         most parsimonious sequence, from root to leaves. """
     if self.sequence == None:  # no sequence has been assigned
         leftbuf = []
         rightbuf = []
         if self.left == None and self.right == None:  # no children, so terminal, cannot propagate scores
             raise RuntimeError("No sequence assigned to leaf node:",
                                self.label)
         if seq == None:  # Only root can do this, no parents to consider, so we pick the lowest scoring symbol
             currbuf = []
             for col in range(aln.alignlen):
                 min_score = 999999
                 min_symb = None
                 left_symb = None
                 right_symb = None
                 for a_parent in range(len(aln.alphabet)):
                     if self.seqscores[col][a_parent] < min_score:
                         min_score = self.seqscores[col][a_parent]
                         min_symb = a_parent
                         left_symb = self.backleft[col][a_parent]
                         right_symb = self.backright[col][a_parent]
                 currbuf.append(aln.alphabet[min_symb])
                 leftbuf.append(aln.alphabet[left_symb])
                 rightbuf.append(aln.alphabet[right_symb])
             self.sequence = sequence.Sequence(currbuf,
                                               aln.alphabet,
                                               self.label,
                                               gappy=True)
         else:  # Non-root, but not leaf
             self.sequence = seq
             col = 0
             for sym_parent in self.sequence:
                 a_parent = aln.alphabet.index(sym_parent)
                 left_symb = self.backleft[col][a_parent]
                 right_symb = self.backright[col][a_parent]
                 leftbuf.append(aln.alphabet[left_symb])
                 rightbuf.append(aln.alphabet[right_symb])
                 col += 1
         self.left._backwardParsimony(
             aln,
             sequence.Sequence(leftbuf,
                               aln.alphabet,
                               self.label,
                               gappy=True))
         self.right._backwardParsimony(
             aln,
             sequence.Sequence(rightbuf,
                               aln.alphabet,
                               self.label,
                               gappy=True))
     return self.sequence
Esempio n. 4
0
def read(args):

    outputfile = output(args)

    orig_dict = {}

    if '.csv' in args.input:
        print("this is a CSV file")
        outputfile = outputfile + '.fa'
        with open(args.input, newline='') as f:
            reader = csv.reader(f)
            for row in reader:
                orig_dict[row[0]] = row[1]
        seq_list = [
            sequence.Sequence(sequence=seq, name=seqname)
            for seqname, seq in orig_dict.items()
        ]
        sequence.writeFastaFile(outputfile, seq_list)

    elif '.tab' in args.input or '.tsv' in args.input:
        print("this is a TAB/TSV file")
        outputfile = outputfile + '.fa'
        with open(args.input) as tsv:
            for line in csv.reader(tsv, dialect="excel-tab"):
                orig_dict[line[0]] = line[1]

        seq_list = [
            sequence.Sequence(sequence=seq, name=seqname)
            for seqname, seq in orig_dict.items()
        ]
        sequence.writeFastaFile(outputfile, seq_list)

    elif '.fa' in args.input or '.fasta' in args.input:
        print("this is a FASTA file")
        outputfile = outputfile + '.csv'
        db100 = sequence.readFastaFile(args.input,
                                       sequence.Protein_Alphabet,
                                       ignore=True,
                                       parse_defline=False)

        with open(outputfile, 'w', newline='') as f:
            fieldnames = ['Name', 'Sequence']
            thewriter = csv.DictWriter(f, fieldnames=fieldnames)

            thewriter.writeheader()
            for seq in db100:
                s = ''.join(seq.sequence)
                thewriter.writerow({'Name': seq.name, 'Sequence': s})
Esempio n. 5
0
def parse_sequences(raw_susceptible_file, raw_resistant_file):
    """
    Parses a raw sequence file into a Sequence object.

    Args:
        raw_sequence_file: the filename where sequences are stored.
        raw_drm_file: a file containing a comma-separated list of DRM 
        positions.

    Returns:
        A list of Sequence objects.
    """

    print 'Parsing sequences.'

    sequences = []
    for susceptible, resistant in \
        zip(SeqIO.parse(raw_susceptible_file, "fasta"), \
            SeqIO.parse(raw_resistant_file, "fasta")):
        
        sequences.append(sequence.Sequence(
                            susceptible, 
                            resistant
                        ))
        print '.',
        sys.stdout.flush()

    print
    return sequences
Esempio n. 6
0
 def add_sequence(self, x, y):
     '''Add a sequence to the list, where x is the sequence of
     observations, and y is the sequence of states.'''
     num_seqs = len(self.seq_list)
     x_ids = [self.x_dict.get_label_id(name) for name in x]
     y_ids = [self.y_dict.get_label_id(name) for name in y]
     self.seq_list.append(seq.Sequence(self, x_ids, y_ids, num_seqs))
Esempio n. 7
0
    def KL_MC(self, p2, nb=100, lg=1000):
        """Compute KL-distance with Monte Carlo at Proportion p2 on
nb=100 Sequence of length lg=1000."""

        if nb <= 0:
            print "Too few sequences"
            return

        if lg <= 1:
            print "Too short sequences"
            return

        lx = lexique.Lexique()
        lx[1] = self.loglex(1)
        lx[2] = p2.loglex(2)

        g = sequence.Sequence()

        v = 0.0
        for i in range(nb):
            g.read_prop(self, long=lg)
            lv = lx.ls_evalue(g)
            v += lv[1] - lv[2]

        v /= nb
        return v / (lg - 1)
Esempio n. 8
0
    def KL_MC(self, lp2, nb=100, lg=1000):
        """Compute Kullback-Leibler divergence to Lproportion lp2 with
Monte Carlo simulation on nb=100 Sequence of length lg=1000.
"""

        if nb <= 0:
            print "Too few sequences"
            return

        if lg <= 1:
            print "Too short sequences"
            return

        lx1 = lexique.Lexique()
        lx1.read_Lprop(self)
        lx2 = lexique.Lexique()
        lx2.read_Lprop(lp2)

        g = sequence.Sequence()
        p = partition.Partition()
        v = 0.0
        for i in range(nb):
            print i, '\r',
            sys.stdout.flush()
            g.read_Lprop(self, long=lg)
            p.viterbi(g, lx1)
            v += p.val()
            p.viterbi(g, lx2)
            v -= p.val()
        v /= nb
        return v / (lg - 1)
Esempio n. 9
0
def allMotifs_fa(args):
	#check hoow many cols, check if all of them has a value
	#make a FASTA and make a CSV

	
	for i in range (len(args.input)):
		c=0
		fasta = {}
		name = (args.input[i])
		name = name.split('.')[0]
		name = name + '_reduced.fa'

		with open(args.input[i], newline='') as f:
			reader = csv.reader(f)
			header = next(reader)

			for row in reader:
				isEmpty = False
				for i in range(1, len(header)-1):
					if row[i] == "":
						isEmpty = True
						break
				if isEmpty == False:
					fasta[row[0]] = row[len(header)-1]

		seq_list = [sequence.Sequence(sequence=seq, name=seqname) for seqname, seq in fasta.items()]
		sequence.writeFastaFile(name, seq_list)
		c+=1
		print(str(len(seq_list)) + " sequences kept after applying the requirements for " + name)
Esempio n. 10
0
 def _backwardParsimony(self, aln, seq=None):
     """ Internal function that operates recursively to inspect scores to determine
         most parsimonious sequence, from root to leaves. """
     if self.sequence == None:  # no sequence has been assigned
         childbuf = [[] for _ in range(self.nChildren())]
         if self.nChildren(
         ) == 0:  # no children, so terminal, cannot propagate scores
             raise RuntimeError("No sequence assigned to leaf node:",
                                self.label)
         if seq == None:  # Only root can do this, no parents to consider, so we pick the lowest scoring symbol
             currbuf = []
             for col in range(aln.alignlen):
                 min_score = 999999
                 min_symb = None
                 child_symb = [None for _ in range(self.nChildren())]
                 for a_parent in range(len(aln.alphabet)):
                     if self.seqscores[col][a_parent] < min_score:
                         min_score = self.seqscores[col][a_parent]
                         min_symb = a_parent
                         for i in range(self.nChildren()):
                             child_symb[i] = self.backptr[i][col][a_parent]
                 currbuf.append(aln.alphabet[min_symb])
                 for i in range(self.nChildren()):
                     childbuf[i].append(aln.alphabet[child_symb[i]])
             self.sequence = sequence.Sequence(currbuf,
                                               aln.alphabet,
                                               self.label,
                                               gappy=True)
         else:  # Non-root, but not leaf
             self.sequence = seq
             col = 0
             for sym_parent in self.sequence:
                 a_parent = aln.alphabet.index(sym_parent)
                 child_symb = [None for _ in range(self.nChildren())]
                 for i in range(self.nChildren()):
                     child_symb[i] = self.backptr[i][col][a_parent]
                     childbuf.append(aln.alphabet[child_symb[i]])
                 col += 1
         for i in range(self.nChildren()):
             self.children[i]._backwardParsimony(
                 aln,
                 sequence.Sequence(childbuf[i],
                                   aln.alphabet,
                                   self.label,
                                   gappy=True))
     return self.sequence
Esempio n. 11
0
def sample(hmm, observations):
    """
    Samples a finite number of times (observations) the given HMM. returns two sequences: State path and Emission sequence.
    """
    random.seed() # force reseeding

    state_path = seq.Sequence("State path", "")
    emission_sequence = seq.Sequence("Sequence", "")

    current_state = hmm.begin_state()
    for i in range(observations):
        current_state = current_state.sample_transition()
        if current_state.is_end():
            break
        state_path.append(current_state.short_name)
        emission_sequence.append(current_state.sample_emission())

    return alignment.Alignment(emission_sequence, state_path)
Esempio n. 12
0
def run(channels, output, attributes, canvas, mask):
    seq = sequence.Sequence(attributes)
    seq.append(channels, mask, canvas, output['num_frames'], 0)
    seq.loop_to_beginning(output['num_loop_frames'])
    img = lapnorm.generate(seq,
                           attributes,
                           output,
                           start_from=0,
                           preview=False)
    return img
Esempio n. 13
0
def dna_read_file(filename):
    gene = []
    with open (filename, 'r') as gene_a:  # load the pulses dna file
        lines_1 = gene_a.readlines ()
        for line in lines_1:
            parts = line.split (',')
        gene.append (parts [0])
    genome_sequence = sequence.Sequence (parts [0], 'ACGT', 'ACTT', "genome_01.dat")
    all_bases = genome_sequence.all_base ()
    print (f'The total number of base is: {all_bases}')
Esempio n. 14
0
    def read_sequences(self):
        sra = open(self.f)
        sra_lines = sra.readlines()
        c = 0

        for i in range(len(sra_lines)):
            if re.match(r'^>', sra_lines[i]):
                #s = dame_sig_linea()
                s_l = int(sra_lines[i].split()[2].split('=')[1])
                s = sra_lines[i+1]
                seq = sequence.Sequence(s, s_l)
            self.sequences.append(seq)
Esempio n. 15
0
    def __init__(self, label, *args, **kwargs):
        self.label = label
        self.protein_name = kwargs.get('protein_name', self.label)
        self.class_name = kwargs.get('class_name', self.label)
        self.struct_type = kwargs.get('struct_type', 'single')

        self._load_universe()
        self._update()

        self.structure_sequence = sequence.Sequence(self.protein)
        self.domains = sequence.SequenceDomain(self.structure_sequence)
        self._translation_vectors = []
Esempio n. 16
0
 def get_best_alignment(self):
     state_path = seq.Sequence("State path", "")
     current_cell = self.get_end_cell()
     score = current_cell.value
     if score > -INFINITY:
         current_cell = current_cell.parent
         while not current_cell.state.is_begin():
             state_path.append(current_cell.state.short_name)
             current_cell = current_cell.parent
         state_path.reverse()
     else:
         state_path = None
     return alignment.Alignment(self.sequence, state_path, score)
Esempio n. 17
0
def posEqual_fa(args):
	fasta = {}
	name = (args.input[0])
	name = name.split('.')[0]
	csvFile = name + '_reduced.csv'
	seqCol = 0

	with open(csvFile, newline='') as f:
		reader = csv.reader(f)
		header = next(reader)
		seqCol = len(header)-1
		
		for row in reader:
			fasta[row[0]] = row[len(header)-1]

		seq_list = [sequence.Sequence(sequence=seq, name=seqname) for seqname, seq in fasta.items()]
		sequence.writeFastaFile(name+'.fa', seq_list)
Esempio n. 18
0
def generateSubsequence(sequence, itemK, itemList):
    string = getStringBetween(str(sequence), '<', '>')
    string = '{' + string[string.find(str(itemK)):len(string)]
    itemsetStrings = re.findall(REGEX_IS, string)
    # create sequence object
    sequenceObject = seq.Sequence()
    # create itemset objects and append to sequence object
    for itemset in itemsetStrings:
        tokens = itemset.split(',')
        intValues = [int(x) for x in tokens]
        # create a new ItemSet object
        temp = element.ItemSet()
        # add items and mis into itemset
        [temp.addItem(itemList.getItem(x)) for x in intValues]
        # add itemset to a sequence
        sequenceObject.addItemSet(temp)
    return sequenceObject
Esempio n. 19
0
 def call_samfile(self, samf):
     if type(samf) in (str, unicode):
         samf = Samfile(samf)
     for reference in samf.references:
         # stats
         self.coverage = {
             "max_coverage": None,
             "min_coverage": None,
             "avg_coverage": 0,
             "column_count": 0
         }
         self.call_type_hist = collections.defaultdict(int)
         #
         pileup = samf.pileup(reference=reference)
         seq = self.call_pileup(pileup)
         name = "%s_consensus" % reference
         if self.coverage["column_count"]:
             self.coverage["avg_coverage"] = self.coverage[
                 "avg_coverage"] / float(self.coverage["column_count"])
         yield sequence.Sequence(seq, name=name)
Esempio n. 20
0
def generate_sequence(nframes, npts, msm_noise=.02):
    np.random.seed(654)  # repeatability

    pt_radius = 1.
    R_pert = .02
    t_pert = .02

    # Generate points
    pts = (np.random.rand(npts, 3) * 2 - 1) * pt_radius

    # Generate cameras
    K = np.eye(3)
    R = np.eye(3)
    t = np.zeros(3)
    Rs = []
    ts = []
    measurements = []
    for i in range(nframes):
        Rs.append(R)
        ts.append(t)
        measurements.append(generate_measurements(K, R, t, pts, msm_noise))
        R = perturb_rotation(Rs[-1], R_pert)
        t = perturb_vector(ts[-1], t_pert)

    measurements = np.array(measurements)

    # Convert into tracks
    tracks = []
    for track_msms in np.transpose(measurements, (1, 0, 2)):
        tracks.append(sequence.Track(arange(nframes), track_msms))

    # Create the sequence
    seq = sequence.Sequence()
    seq.K = K
    seq.tracks = tracks
    seq.initial_Rs = Rs  # TODO: add noise
    seq.initial_ts = ts  # TODO: add noise
    seq.initial_xs = pts  # TODO: add noise

    return seq
Esempio n. 21
0
def parseDataFile(dataFileName, misValueDictionary):
    # database object
    database = db.Database()
    dataFile = open(dataFileName, 'r')
    try:
        # for each line ending with newline char
        for line in dataFile.readlines():
            # get sequence string
            sequenceString = getStringBetween(line, '<', '>')
            # get all itemset strings
            itemsetStrings = re.findall('{([^{]*)}', sequenceString)
            # create sequence object
            sequenceObject = seq.Sequence()
            # create itemset objects and append to sequence object
            for itemset in itemsetStrings:
                tokens = itemset.split(',')
                intValues = [int(x) for x in tokens]
                # create a new ItemSet object
                temp = element.ItemSet()
                # add items and mis into itemset
                [
                    temp.addItem(item.Item(x, misValueDictionary[x]))
                    for x in intValues
                ]
                # sort items based on mis values
                temp.sortItemSet()
                # add itemset to a sequence
                sequenceObject.addItemSet(temp)
            # append sequence to database
            database.sequenceList.append(sequenceObject)
    # else throw IOError
    except IOError as e:
        print(str(e))
        sys.exit(2)
    finally:
        dataFile.close()
    return database
Esempio n. 22
0
 def add_named_sequence(self, name):
     seq = sequence.Sequence(self.profile, name)
     seq.create_default_tracks()
     self.sequences.append(seq)
     self.next_seq_number += 1
Esempio n. 23
0
test_element1.print_overview()

test_element2.print_overview()

#-------------------------continue-------------------------------
# -------------------------------------------------------
# # viewing of the sequence for second check of timing etc
viewer.show_element_stlab(test_element1, delay=False, channels='all', ax=None)
viewer.show_element_stlab(test_element2, delay=False, channels='all', ax=None)

#--------------------------------------------------------

#-------------------------------------------------------
# now to send everything to the AWG, we have perform the last step by putting everything
# into a sequence
seq = sequence.Sequence(sequence_name)
seq.append(name='first_element',
           wfname=sequence_name + '_element1',
           trigger_wait=True)  #,
#            # goto_target='first_element')#, jump_target='first special element')

seq.append(name='second element',
           wfname=sequence_name + '_element1',
           trigger_wait=True)  #,
#            # goto_target='third element', jump_target='second special element')

AWG.program_awg(seq, test_element1, test_element2,
                verbose=True)  #, test_element2)

AWG.AWGrun()
Esempio n. 24
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument("-i",
                        "--input",
                        help="FASTA file to query from",
                        required=True)
    parser.add_argument("-q",
                        "--query",
                        help="Query FASTA file",
                        required=True)
    parser.add_argument("-db",
                        "--database",
                        help="Database output file name",
                        required=True)
    parser.add_argument("-r",
                        "--reference",
                        help="Reference database ",
                        default="uniprotkb")
    parser.add_argument("-o",
                        "--output",
                        help="Output path",
                        default="matchmyseqs")

    args = parser.parse_args()

    seqDict = {}
    tier1seq = ''
    representative = ''
    fasta = {}
    seqsforCSV = {}
    progress = 0
    tier1 = {}
    tier1_annots = {
    }  # annotations that we want to include in the final dataset

    os.system('makeblastdb -dbtype prot -in ' + args.input + ' -out ' +
              args.database)

    db = sequence.readFastaFile(args.input,
                                sequence.Protein_Alphabet,
                                ignore=True,
                                parse_defline=False)
    db_map = {}  # map from "long" name to actual entry
    db_map_short = {}  # map from "short" name to entry
    for s in db:
        db_map[s.name] = s
        db_map_short[sequence.parseDefline(s.name)[0]] = s
    print("Database size is " + str(len(db_map)))

    print(
        "Blast started, this might take a bit depending on your dataset size")
    os.system("blastp -db " + args.database +
              " -outfmt 3 -num_descriptions 1 -num_alignments 0 -query " +
              args.query + " -out query.txt")

    if args.reference == 'uniprotkb':
        os.system(
            "grep -e \"^[st][pr]|\" query.txt | cut -d\' \' -f1 > UniProt_query.tab"
        )

        # Extract the resulting sequence identifiers
        repSeqNames = set([])
        f = open('UniProt_query.tab', 'rt')
        for row in f:
            repSeqNames.add(sequence.parseDefline(row.strip())[0])
        f.close()
        print(str(len(repSeqNames)),
              " representative sequences have been found")

        #Annot the representative sequences
        notfound = []
        for name in repSeqNames:
            if name in db_map_short:
                s = db_map_short[name]
                seqsforCSV[s.name] = "".join(s)
            else:
                notfound.append(name)
        print('Matched',
              len(repSeqNames) - len(notfound), 'of', len(repSeqNames))

        with open("query.txt", newline='') as f:
            reader = csv.reader(f)
            for row in reader:
                if len(row) > 0 and row[0].startswith('Query'):
                    querySeq = (str(row).split("=")[1][:-2].strip())
                elif len(row) > 0 and (row[0].startswith('tr|')
                                       or row[0].startswith('sp|')):
                    representative = (str(row).split(" ")[0][2:].strip())
                    seqDict[querySeq] = representative

    elif args.reference == 'refseq':
        grab = False
        repSeqNames = set([])

        with open("query.txt", newline='') as f:
            reader = csv.reader(f)
            for row in reader:
                if len(row) > 0 and row[0].startswith('Query'):
                    querySeq = (str(
                        row[0]).split("=")[1][:-2].strip().split(" ")[0])
                elif len(row) > 0 and row[0].startswith('Sequences'):
                    grab = True
                    continue
                elif grab == True:
                    if len(row) > 0 and not row[0].strip() == "":
                        representative = (row[0].split('.')[0] + "." +
                                          row[0].split('.')[1].split(" ")[0])
                        repSeqNames.add(representative)
                        seqDict[querySeq] = representative
                        grab = False
            #print(len(repSeqNames))

            notfound = []

            for name in repSeqNames:
                if name in db_map_short:
                    s = db_map_short[name]
                    seqsforCSV[s.name] = "".join(s)
                else:
                    notfound.append(name)
            print('Matched',
                  len(repSeqNames) - len(notfound), 'of', len(repSeqNames))

            print(len(repSeqNames),
                  " representative sequences found for " + args.query)

    # done25 = False
    # done50 = False
    # done75 = False
    # for s,rep in seqDict.items():
    # 	total = (len(seqDict))
    # 	seq = (sequence.getSequence(rep,'uniprotkb'))
    # 	seqsforCSV[rep] = str(seq).split(":")[1].strip()
    # 	elem = rep + str(seq)
    # 	progress+=1
    # 	if (progress/total)*100 > 25 and not done25:
    # 		print("25% done")
    # 		done25 = True
    # 	elif (progress/total)*100 > 50 and not done50:
    # 		print("50% done")
    # 		done50 = True
    # 	elif (progress/total)*100 > 75 and not done75:
    # 		print("75% done")
    # 		done75 = True

    faOut = args.output + '.fa'

    seq_list = [
        sequence.Sequence(sequence=seq, name=seqname)
        for seqname, seq in seqsforCSV.items()
    ]

    sequence.writeFastaFile(faOut, seq_list)

    csvOut = args.output + '.csv'

    with open(csvOut, 'w', newline='') as f:
        fieldnames = ['Name', 'Representative', 'Sequence']
        thewriter = csv.DictWriter(f, fieldnames=fieldnames)

        thewriter.writeheader()
        for given, rep in seqDict.items():
            thewriter.writerow({
                'Name': given,
                'Representative': rep,
                'Sequence': seqsforCSV[rep]
            })
Esempio n. 25
0
def _render_reverse_clip_dialog_callback(dialog, response_id, fb_widgets, media_file):
    if response_id == Gtk.ResponseType.ACCEPT:
        # speed, filename folder
        speed = float(int(fb_widgets.hslider.get_value())) / 100.0
        file_name = fb_widgets.file_name.get_text()
        filenames = fb_widgets.out_folder.get_filenames()
        folder = filenames[0]
        write_file = folder + "/"+ file_name + fb_widgets.extension_label.get_text()

        if os.path.exists(write_file):
            primary_txt = _("A File with given path exists!")
            secondary_txt = _("It is not allowed to render Motion Files with same paths as existing files.\nSelect another name for file.") 
            dialogutils.warning_message(primary_txt, secondary_txt, dialog)
            return

         # Profile
        profile_index = fb_widgets.out_profile_combo.get_active()
        if profile_index == 0:
            # project_profile is first selection in combo box
            profile = PROJECT().profile
        else:
            profile = mltprofiles.get_profile_for_index(profile_index - 1)

        # Render consumer properties
        encoding_option_index = fb_widgets.encodings_cb.get_active()
        quality_option_index = fb_widgets.quality_cb.get_active()

        # Range
        range_selection = fb_widgets.render_range.get_active()
        
        dialog.destroy()

        # Create motion producer
        source_path = media_file.path
        if media_file.is_proxy_file == True:
            source_path = media_file.second_file_path

        motion_producer = mlt.Producer(profile, None, str("timewarp:" + str(speed) + ":" + str(source_path)))
        mltrefhold.hold_ref(motion_producer)
        
        # Create sequence and add motion producer into it
        seq = sequence.Sequence(profile)
        seq.create_default_tracks()
        track = seq.tracks[seq.first_video_index]
        track.append(motion_producer, 0, motion_producer.get_length() - 1)

        print "motion clip render starting..."

        consumer = renderconsumer.get_render_consumer_for_encoding_and_quality(write_file, profile, encoding_option_index, quality_option_index)
        
        # start and end frames
        start_frame = 0
        end_frame = motion_producer.get_length() - 1
        wait_for_producer_stop = True
        if range_selection == 1:
            start_frame = int(float(media_file.length - media_file.mark_out - 1) * (1.0 / -speed))
            end_frame = int(float(media_file.length - media_file.mark_out + (media_file.mark_out - media_file.mark_in) + 1) * (1.0 / -speed)) + int(1.0 / -speed)

            if end_frame > motion_producer.get_length() - 1:
                end_frame = motion_producer.get_length() - 1
            if start_frame < 0:
                start_frame = 0
            
            wait_for_producer_stop = False # consumer wont stop automatically and needs to stopped explicitly

        # Launch render
        global motion_renderer, motion_progress_update
        motion_renderer = renderconsumer.FileRenderPlayer(write_file, seq.tractor, consumer, start_frame, end_frame)
        motion_renderer.wait_for_producer_end_stop = wait_for_producer_stop
        motion_renderer.start()

        title = _("Rendering Reverse Clip")
        text = "<b>Motion Clip File: </b>" + write_file
        progress_bar = Gtk.ProgressBar()
        dialog = rendergui.clip_render_progress_dialog(_FB_render_stop, title, text, progress_bar, gui.editor_window.window)

        motion_progress_update = renderconsumer.ProgressWindowThread(dialog, progress_bar, motion_renderer, _REVERSE_render_stop)
        motion_progress_update.start()

    else:
        dialog.destroy()
Esempio n. 26
0
        default=True,
        help=
        "Specify whether reverse complementary patterns should be taken into account as well.",
    )

    return parser


if __name__ == "__main__":
    start_time = time.time()

    print("\nParsing input file...")
    parser = construct_argparser()
    args = parser.parse_args()
    contents = seq.parse_fasta_file(args.input_path)
    sequence = seq.Sequence(contents)
    print("\nSuccesfully parsed {} into a sequence of length {}.\n".format(
        args.input_path, sequence.length))

    print("\nDetermining the G-C skew minima...")
    skew = sequence.skew_graph()
    skew_minima = [str(minimum) for minimum in skew["Skew minima"]]
    skew_minima = " ".join(skew_minima)
    print("\nLocations of skew minima:\n\n{}\n".format(skew_minima))

    print("\nFinding clumps of patterns around the skew minima...")
    width = int(args.window_length / 2)
    dnaa_boxes = []
    for minimum in skew["Skew minima"]:
        min_skew = int(minimum)
        start_pos = (min_skew - width) if (min_skew >= width) else 0
Esempio n. 27
0
def sequence(*steps):
    return s.Sequence(steps)
Esempio n. 28
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i",
                        "--input",
                        help="Input FASTA file",
                        required=True)
    parser.add_argument("-db",
                        "--database",
                        help="Database output file name",
                        required=True)
    parser.add_argument("-r",
                        "--redundancy",
                        nargs='*',
                        help="List of redundancy levels",
                        default=[90, 80, 70])
    parser.add_argument("-t1", "--tier1", help="User's Tier1 sequences")
    parser.add_argument("-t2", "--tier2", help="User's Tier2 sequences")
    parser.add_argument("-ml",
                        "--maxlength",
                        help="Max length that the sequence can be",
                        default=800)
    parser.add_argument("-e",
                        "--eval",
                        nargs='*',
                        help="List of evalues",
                        default=[1e-100, 1e-75, 1e-50, 1e-20, 1e-10, 1e-5])
    args = parser.parse_args()

    tier2 = {}
    tier2_short = {}
    tier2_annots = {
    }  # annotations that we want to include in the final dataset

    if args.tier2:
        print("tier2 sequences have been provided")

        if '.fa' in args.tier2 or '.fasta' in args.tier2:
            print("tier2 sequences are FASTA file")
            tier2db = sequence.readFastaFile(args.tier2,
                                             sequence.Protein_Alphabet,
                                             ignore=True,
                                             parse_defline=False)
            print(str(len(tier2_list)) + " sequences in tier2")
            tier2_list = {}  # map from "long" name to actual entry
            tier2_map_short = {}  # map from "short" name to entry
            for s in tier2db:
                tier2_list[s.name] = s
                tier2_map_short[sequence.parseDefline(s.name)[0]] = s
        else:
            print("Please provide FASTA file for tier-2")

    if args.tier1:
        tier1 = {}
        tier1_annots = {
        }  # annotations that we want to include in the final dataset
        print("Tier-1 sequences have been provided")
        if '.fa' in args.tier1 or '.fasta' in args.tier1:

            print("Tier-1 sequences are provided as a FASTA file")
            tier1db = sequence.readFastaFile(args.tier1,
                                             sequence.Protein_Alphabet,
                                             ignore=True,
                                             parse_defline=False)
            tier1_list = {}
            for s in tier1db:
                tier1_list[s.name] = "".join(s.sequence)
            print("Tier-1 has " + str(len(tier1_list)) + " sequences")

        else:
            print("Please provide FASTA file for tier-1")

    db100 = sequence.readFastaFile(args.input,
                                   sequence.Protein_Alphabet,
                                   ignore=True,
                                   parse_defline=False)
    db100_map = {}  # map from "long" name to actual entry
    db100_map_short = {}  # map from "short" name to entry
    for s in db100:
        db100_map[s.name] = s
        db100_map_short[sequence.parseDefline(s.name)[0]] = s
    print("Database has " + str(len(db100_map)) + " sequences")

    for rr in args.redundancy:
        rs = str(rr)
        os.system('cd-hit -i ' + args.input + ' -c 0.' + rs + ' -T 5 -o db' +
                  rs + ' -d 0')

    selected = {}
    for rr in args.redundancy:
        selected[rr] = []
        filename = 'db' + str(rr) + '.clstr'
        clusters = readCDHIT(filename)
        for c in clusters:
            picked_one = False
            shortest = None
            reviewed = None
            for name in clusters[c]:
                if name in db100_map:
                    seq = db100_map[name]
                    if shortest:
                        if len(seq) < len(shortest) and not disqualified(
                                seq, args):
                            shortest = seq
                    elif not disqualified(seq, args):
                        shortest = seq
                    if seq.name.startswith('sp|') and not disqualified(
                            seq, args):
                        reviewed = seq
                    if name in tier1_list:
                        #print("this one orig" + str(seq))
                        selected[rr].append(seq)
                        picked_one = True
                else:
                    pass
                    #print('Did not find', name)
            # If no Tier-1, prefer "reviewed", then shortest length
            if not picked_one and reviewed:
                selected[rr].append(reviewed)
            elif not picked_one and shortest:
                selected[rr].append(shortest)

    for rr in args.redundancy:
        filename = 'db' + str(rr) + '.fa'
        sequence.writeFastaFile(filename, selected[rr])

    for rr in args.redundancy:
        os.system('makeblastdb -dbtype prot -in db' + str(rr) +
                  '.fa -out db-' + str(rr))

    # for rr in args.redundancy:
    #     for evalue in args.evalue:
    #         result_file = "dataset-" + str(rr) + '-'+ str(evalue)
    #         cmd1 = "blastp -db db-" + str(rr) + " -outfmt 3 -num_descriptions 20000 -num_alignments 0 -num_threads 5 -query " + args.tier1 + " -out " + result_file + ".txt -evalue " + str(evalue)
    #         print(cmd1)
    #         os.system(cmd1)

    grab = False

    for rr in args.redundancy:
        for evalue in args.eval:
            c = 0
            tpsIdentifier = set([])
            seqs = []
            result_file = "dataset-" + str(rr) + '-' + str(evalue)
            f = open(result_file + '.txt', 'rt')
            for row in f:
                if row.startswith('Sequences'):
                    grab = True
                    continue
                if grab == True:
                    if row.startswith('Lambda'):
                        grab = False
                    if not row.strip() == "":
                        identifier = row.split(' ')[0]
                        if identifier != "Lambda":
                            tpsIdentifier.add(identifier)

            for name in tpsIdentifier:
                try:
                    seq = db100_map[name]
                    info = ''
                    seqs.append(
                        sequence.Sequence(seq.sequence, seq.alphabet, seq.name,
                                          info))
                except:
                    pass
            sequence.writeFastaFile(result_file + ".fa", seqs)
            print(result_file + " has " + str(len(seqs)) + "sequences")

    print('Done')

    totalSeqCount = []
    c = 0
    for evalue in args.eval:
        for rr in args.redundancy:
            output = []
            ev = str(evalue)
            ev = ev[1:]
            red = str(rr)
            result_file = "dataset-" + str(rr) + '-' + str(evalue)
            a = sequence.readFastaFile(result_file + '.fa',
                                       sequence.Protein_Alphabet,
                                       ignore=True,
                                       parse_defline=False)

            names = set([])
            for s in a:
                names.add(s.name)
            tier1_cnt = 0
            tier2_cnt = 0
            seqs = []
            for name in names:
                try:
                    seq = db100_map[name]
                    info = ''
                    if name in tier1_list:
                        tier1_cnt += 1
                        #info = seq.info + ' ' + tier1_annots[name]
                    elif name in tier2:
                        tier2_cnt += 1
                        #info = seq.info + ' ' + tier2_annots[name]
                    seqs.append(
                        sequence.Sequence(seq.sequence, seq.alphabet, seq.name,
                                          info))
                except:
                    pass
                #print('Did not find', name)
            print('Processed', len(seqs), 'for', result_file, ' Tier-1:',
                  tier1_cnt, ' Tier-2:', tier2_cnt)
            output = [ev, red, len(seqs)]
            totalSeqCount.append(output)

    plotSeqs(totalSeqCount)
Esempio n. 29
0
def setup_AWG_pulsed_spec_sequence(sequence_name='Cool_Sequence',
                                   measurement_trigger_delay=2e-6,
                                   SSB_modulation_frequency=-50e6,
                                   measurement_pulse_length=10e-6,
                                   cooling_pulse_length=200e-6,
                                   cooling_measurement_delay=5e-6,
                                   buffer_pulse_length=2.e-6,
                                   readout_trigger_length=1.0e-6,
                                   measurement_pulse_amp=0.5,
                                   doplot=True,
                                   devAWG=Tektronix_AWG520(name='AWG'),
                                   us_clock=True,
                                   trigger_first=False):
    '''
    makes the AWG single element sequences for the cooling experiment.
    It contains a cooling pulse, a readout trigger and a readout pulse.
    readout trigger is the fixpoint, as it defines the timing we see on
    the signal analyzer.
    readout pulse is defined with the IQ modulation of a vector source.
    Cooling pulse is a marker to a microwave switch.

    There is some funky stuff happening if there is no buffers around the
    sequence, therefore we have buffer pulses at the beginning and end
    such that the channels are zero there!
    '''

    if us_clock is True:
        measurement_trigger_delay = measurement_trigger_delay * 1e-3
        SSB_modulation_frequency = SSB_modulation_frequency * 1e-3
        measurement_pulse_length = measurement_pulse_length * 1e-3
        cooling_measurement_delay = cooling_measurement_delay * 1e-3
        cooling_pulse_length = cooling_pulse_length * 1e-3
        buffer_pulse_length = buffer_pulse_length * 1e-3
        readout_trigger_length = 1 * readout_trigger_length * 1e-3

    if trigger_first is True:
        left_reference_pulse_name = 'readout trigger'
    else:
        left_reference_pulse_name = 'pulsed spec'

    AWG = AWG_station.AWG_Station()
    AWG.AWG = devAWG

    clock = devAWG.get_clock()

    devAWG.set_run_mode('ENH')
    devAWG.set_refclock_ext()

    AWG.define_channels(id='ch1',
                        name='RF1',
                        type='analog',
                        high=0.541,
                        low=-0.541,
                        offset=0.,
                        delay=0,
                        active=True)

    AWG.define_channels(id='ch2',
                        name='RF2',
                        type='analog',
                        high=0.541,
                        low=-0.541,
                        offset=0.,
                        delay=0,
                        active=True)

    AWG.define_channels(id='ch2_marker1',
                        name='MW_pulsemod',
                        type='marker',
                        high=1.0,
                        low=0,
                        offset=0.,
                        delay=0,
                        active=True)

    AWG.define_channels(id='ch1_marker1',
                        name='readout_trigger',
                        type='marker',
                        high=1,
                        low=0,
                        offset=0.,
                        delay=0,
                        active=True)

    sin_pulse = pulse.CosPulse(channel='RF1', name='A sine pulse on RF')
    sin_pulse_2 = pulse.CosPulse(channel='RF2', name='A sine pulse on RF')

    SSB_pulse = pulse.MW_IQmod_pulse(I_channel='RF1',
                                     Q_channel='RF2',
                                     name='SSB pulse')

    pulsed_spec_pulse = pulse.SquarePulse(channel='MW_pulsemod',
                                          name='A square pulse on MW pmod')

    readout_trigger_pulse = pulse.SquarePulse(channel='readout_trigger',
                                              name='A square pulse on MW pmod')

    readout_trigger_pulse = pulse.SquarePulse(channel='readout_trigger',
                                              name='A square pulse on MW pmod')

    sq_pulse_ch1 = pulse.SquarePulse(channel='RF1',
                                     name='A square pulse on MW pmod')

    sq_pulse_ch2 = pulse.SquarePulse(channel='RF2',
                                     name='A square pulse on MW pmod')

    test_element1 = element.Element(
        (sequence_name + '_element1'),
        pulsar=AWG)  #, ignore_offset_correction=True)
    test_element2 = element.Element(
        (sequence_name + '_element2'),
        pulsar=AWG)  #, ignore_offset_correction=True)

    test_element1.add(pulse.cp(readout_trigger_pulse,
                               amplitude=1.,
                               length=readout_trigger_length),
                      start=0.1e-6,
                      name='readout trigger',
                      refpoint='start')

    test_element1.add(pulse.cp(SSB_pulse,
                               mod_frequency=SSB_modulation_frequency,
                               amplitude=measurement_pulse_amp,
                               length=measurement_pulse_length),
                      start=measurement_trigger_delay,
                      name='readout pulse',
                      refpulse='readout trigger',
                      refpoint='start')

    test_element1.add(pulse.cp(pulsed_spec_pulse,
                               amplitude=1.,
                               length=cooling_pulse_length),
                      start=-1 * cooling_measurement_delay -
                      cooling_pulse_length,
                      name='pulsed spec',
                      refpulse='readout pulse',
                      refpoint='start')

    test_element1.add(pulse.cp(readout_trigger_pulse,
                               amplitude=0.,
                               length=buffer_pulse_length),
                      start=-1 * buffer_pulse_length,
                      name='buffer left',
                      refpulse=left_reference_pulse_name,
                      refpoint='start')

    test_element1.add(pulse.cp(readout_trigger_pulse,
                               amplitude=0.,
                               length=buffer_pulse_length),
                      start=0,
                      name='buffer right',
                      refpulse='readout pulse',
                      refpoint='end')

    test_element2.add(pulse.cp(readout_trigger_pulse,
                               amplitude=1.,
                               length=readout_trigger_length),
                      start=0.1e-6,
                      name='readout trigger',
                      refpoint='start')

    test_element2.add(pulse.cp(SSB_pulse,
                               mod_frequency=SSB_modulation_frequency,
                               amplitude=measurement_pulse_amp,
                               length=measurement_pulse_length),
                      start=measurement_trigger_delay,
                      name='readout pulse',
                      refpulse='readout trigger',
                      refpoint='start')

    test_element2.add(pulse.cp(pulsed_spec_pulse,
                               amplitude=1.,
                               length=cooling_pulse_length),
                      start=-1 * cooling_measurement_delay -
                      cooling_pulse_length,
                      name='pulsed spec',
                      refpulse='readout pulse',
                      refpoint='start')

    test_element2.add(pulse.cp(readout_trigger_pulse,
                               amplitude=0.,
                               length=buffer_pulse_length),
                      start=-1 * buffer_pulse_length,
                      name='buffer left',
                      refpulse=left_reference_pulse_name,
                      refpoint='start')

    test_element2.add(pulse.cp(readout_trigger_pulse,
                               amplitude=0.,
                               length=buffer_pulse_length),
                      start=0,
                      name='buffer right',
                      refpulse='readout pulse',
                      refpoint='end')

    #print('Channel definitions: ')

    #test_element1.print_overview()

    # test_element2.print_overview()

    # -------------------------continue-------------------------------

    # -------------------------------------------------------
    # viewing of the sequence for second check of timing etc
    if doplot is True:
        viewer.show_element_stlab(test_element1,
                                  delay=False,
                                  channels='all',
                                  ax=None)
        viewer.show_element_stlab(test_element2,
                                  delay=False,
                                  channels='all',
                                  ax=None)

    # --------------------------------------------------------

    devAWG.init_dir()
    devAWG.clear_waveforms()
    seq = sequence.Sequence(sequence_name)
    seq.append(name='first_element',
               wfname=(sequence_name + '_element1'),
               trigger_wait=True,
               goto_target='second element')

    seq.append(name='second_element',
               wfname=(sequence_name + '_element2'),
               trigger_wait=True,
               goto_target='first_element')

    AWG.program_awg(seq, test_element1, test_element2,
                    verbose=True)  #, test_element2)
Esempio n. 30
0
 def __init__(self, tx):
     self.txid = txid.Txid(tx)
     self.vout = vout.Vout(tx)
     self.script_sig_size = script_sig_size.ScriptSigSize(tx)
     self.script_sig = script_sig.ScriptSig(tx, self)
     self.sequence = sequence.Sequence(tx)