コード例 #1
0
ファイル: fc_asm_graph.py プロジェクト: lpp1985/lpp_Script
    def load_sg_seq(self, fasta_fn):

        all_read_ids = set() # read ids in the graph

        for v, w in self.sg_edges:
            type_ = self.sg_edges[ (v, w) ][-1]
            if type_ != "G":
                continue
            v = v.split(":")[0]
            w = w.split(":")[0]
            all_read_ids.add(v)
            all_read_ids.add(w)

        seqs = {}
        # load all p-read name into memory
        f = FastaReader(fasta_fn)
        for r in f:
            if r.name not in all_read_ids:
                continue
            seqs[r.name] = r.sequence.upper()


        for v, w in self.sg_edges:
            seq_id, s, t = self.sg_edges[ (v, w) ][0]
            type_ = self.sg_edges[ (v, w) ][-1]

            if type_ != "G":
                continue

            if s < t:
                e_seq = seqs[ seq_id ][ s:t ]
            else:
                e_seq = "".join([ RCMAP[c] for c in seqs[ seq_id ][ t:s ][::-1] ])
            self.sg_edge_seqs[ (v, w) ] = e_seq
コード例 #2
0
    def inputFile(self, fname):
        self.fname=fname
        self.uniqueID= ''
        self.drugsGiven=[]
        reader = FastaReader(fname)

        mutationList= []
        for header, seq in reader.readFasta():
            mutationList.append((header,seq))

        self.seqt0= mutationList[0][1]
        self.seqtf= mutationList[-1][1]
        #Shaves '>' 

        self.findMutations()
        self.possibleMutations= findAllPossibleMutations(self.seqt0)
        #Parse the header and put in relevant information
        finalHeader= mutationList[-1][0]
        #print(finalHeader)
        readHeader= True
        firstUnderScore= True
        builtStr=''
        readDrugs=False
        for char in header:
            #print("Char:" +char)
            #print("builtStr: "+builtStr)
            if readHeader:
                if char=='_':
                    if firstUnderScore:
                        builtStr+=char
                        firstUnderScore= False
                    else:
                        readHeader= False
                        self.uniqueID= builtStr
                else:
                    builtStr+=char

            elif readDrugs:
                if char== '_':
                    self.drugsGiven.append(builtStr)
                    builtStr=''
                elif builtStr == 'None':
                    break

                else:
                    builtStr+=char

            elif builtStr== '__':
                readDrugs= True
                builtStr=''
                builtStr+=char

            elif char != '_':
                builtStr= ''
            else:
                builtStr+=char
コード例 #3
0
ファイル: substringSearch.py プロジェクト: Wei0612/CodingTest
def main():
    fastaDir = os.path.abspath(sys.argv[1])  # file directory
    fastaReader = FastaReader()
    fastaReader.readFastaFile(fastaDir)     # read file
    
    # PROBLEM 1. (Detail Algorithm implemented in Sequence.py (subStringSearch() function))
    searchString = sys.argv[2]
    numOfSubstring = fastaReader.numberOfSubstring(searchString)
    print(f"Problem 1: String, {searchString}, appears {numOfSubstring} times in file {fastaReader.getFileName()}")

    # PROBLEM 2. (Detail Algorithm implemented in FastaReader.py ())
    lengthOfString = int(sys.argv[3])
    highestFreqString, appearTimes, subseqCounter = fastaReader.findHighestOccurrence(lengthOfString)
    print(f"Problem 2: {lengthOfString}-mer subsequence, {highestFreqString}, has highest occurrences, {appearTimes} times, in file {fastaReader.getFileName()}")
    
    # output the all substring counts in substringCount.csv
    with open(os.path.join(os.path.abspath("./"), "Output", "substringCountLarger10.csv"), 'w') as outputFile:
        # column names
        outputFile.write("Subsequence,Counts\n")

        for subseq, counter in subseqCounter.items():
            if counter >= 10:
                outputFile.write(f"{subseq},{counter}\n")
コード例 #4
0
                        with_statement)
from builtins import (bytes, dict, int, list, object, range, str, ascii, chr,
                      hex, input, next, oct, open, pow, round, super, filter,
                      map, zip)
# The above imports should allow this program to run in both Python 2 and
# Python 3.  You might need to update your version of module "future".
import sys
import ProgramName
from FastaReader import FastaReader
from FastaWriter import FastaWriter
from Rex import Rex

rex = Rex()

#=========================================================================
# main()
#=========================================================================
if (len(sys.argv) != 3):
    exit(ProgramName.get() + " <in.fasta> <out.fasta>\n")
(infile, outfile) = sys.argv[1:]

OUT = open(outfile, "wt")
writer = FastaWriter()
reader = FastaReader(infile)
while (True):
    (defline, seq) = reader.nextSequence()
    if (not defline): break
    if (not rex.find(">chr", defline)): continue
    writer.addToFasta(defline, seq, OUT)
OUT.close()
コード例 #5
0
            index_opt=None,
            index_outdir = "./bwa_index/")
    """
    Main function of RefMasker that integrate database creation, blast and homology masking
    * Instantiate Blast database and blastn object
    * Perform iterative blasts of query sequences against the subject database and create a list of
    hits.
    """

    # Try to validate a index from an existing one
    try:
        if not index_path:
            raise Exception("No index provided. An index will be generated")

        print("Existing index provided")
        FastaRef = FastaReader(ref1_path, ref2_path, write_merge=False)
        Index = ExistingIndex(bwa_path, index_path)

    # If no index or if an error occured during validation of the existing index = create a new one
    except Exception as E:
        print (E)

        print("Merge References...")
        mkdir(ref_outdir)

        FastaRef = FastaReader([ref1_path,ref2_path], write_merge=True, output="merged.fa")

        print("Generating index...")
        mkdir(db_outdir)
        Index = GenerateIndex(bwa_path, FastaRef.merge_ref, index_opt)
        remove (FastaRef.merge_ref)
コード例 #6
0
from __future__ import print_function
from FastaReader import FastaReader

f = FastaReader("tmp.fa")
count = 0
for r in f:
    rlen = len(r.sequence)
    print(">ccs/{}/{}_{}".format(count, 0, rlen))
    for s in xrange(0, rlen, 60):
        print(r.sequence[s:s + 60])
    count += 1