def load_sg_seq(self, fasta_fn): all_read_ids = set() # read ids in the graph for v, w in self.sg_edges: type_ = self.sg_edges[ (v, w) ][-1] if type_ != "G": continue v = v.split(":")[0] w = w.split(":")[0] all_read_ids.add(v) all_read_ids.add(w) seqs = {} # load all p-read name into memory f = FastaReader(fasta_fn) for r in f: if r.name not in all_read_ids: continue seqs[r.name] = r.sequence.upper() for v, w in self.sg_edges: seq_id, s, t = self.sg_edges[ (v, w) ][0] type_ = self.sg_edges[ (v, w) ][-1] if type_ != "G": continue if s < t: e_seq = seqs[ seq_id ][ s:t ] else: e_seq = "".join([ RCMAP[c] for c in seqs[ seq_id ][ t:s ][::-1] ]) self.sg_edge_seqs[ (v, w) ] = e_seq
def inputFile(self, fname): self.fname=fname self.uniqueID= '' self.drugsGiven=[] reader = FastaReader(fname) mutationList= [] for header, seq in reader.readFasta(): mutationList.append((header,seq)) self.seqt0= mutationList[0][1] self.seqtf= mutationList[-1][1] #Shaves '>' self.findMutations() self.possibleMutations= findAllPossibleMutations(self.seqt0) #Parse the header and put in relevant information finalHeader= mutationList[-1][0] #print(finalHeader) readHeader= True firstUnderScore= True builtStr='' readDrugs=False for char in header: #print("Char:" +char) #print("builtStr: "+builtStr) if readHeader: if char=='_': if firstUnderScore: builtStr+=char firstUnderScore= False else: readHeader= False self.uniqueID= builtStr else: builtStr+=char elif readDrugs: if char== '_': self.drugsGiven.append(builtStr) builtStr='' elif builtStr == 'None': break else: builtStr+=char elif builtStr== '__': readDrugs= True builtStr='' builtStr+=char elif char != '_': builtStr= '' else: builtStr+=char
def main(): fastaDir = os.path.abspath(sys.argv[1]) # file directory fastaReader = FastaReader() fastaReader.readFastaFile(fastaDir) # read file # PROBLEM 1. (Detail Algorithm implemented in Sequence.py (subStringSearch() function)) searchString = sys.argv[2] numOfSubstring = fastaReader.numberOfSubstring(searchString) print(f"Problem 1: String, {searchString}, appears {numOfSubstring} times in file {fastaReader.getFileName()}") # PROBLEM 2. (Detail Algorithm implemented in FastaReader.py ()) lengthOfString = int(sys.argv[3]) highestFreqString, appearTimes, subseqCounter = fastaReader.findHighestOccurrence(lengthOfString) print(f"Problem 2: {lengthOfString}-mer subsequence, {highestFreqString}, has highest occurrences, {appearTimes} times, in file {fastaReader.getFileName()}") # output the all substring counts in substringCount.csv with open(os.path.join(os.path.abspath("./"), "Output", "substringCountLarger10.csv"), 'w') as outputFile: # column names outputFile.write("Subsequence,Counts\n") for subseq, counter in subseqCounter.items(): if counter >= 10: outputFile.write(f"{subseq},{counter}\n")
with_statement) from builtins import (bytes, dict, int, list, object, range, str, ascii, chr, hex, input, next, oct, open, pow, round, super, filter, map, zip) # The above imports should allow this program to run in both Python 2 and # Python 3. You might need to update your version of module "future". import sys import ProgramName from FastaReader import FastaReader from FastaWriter import FastaWriter from Rex import Rex rex = Rex() #========================================================================= # main() #========================================================================= if (len(sys.argv) != 3): exit(ProgramName.get() + " <in.fasta> <out.fasta>\n") (infile, outfile) = sys.argv[1:] OUT = open(outfile, "wt") writer = FastaWriter() reader = FastaReader(infile) while (True): (defline, seq) = reader.nextSequence() if (not defline): break if (not rex.find(">chr", defline)): continue writer.addToFasta(defline, seq, OUT) OUT.close()
index_opt=None, index_outdir = "./bwa_index/") """ Main function of RefMasker that integrate database creation, blast and homology masking * Instantiate Blast database and blastn object * Perform iterative blasts of query sequences against the subject database and create a list of hits. """ # Try to validate a index from an existing one try: if not index_path: raise Exception("No index provided. An index will be generated") print("Existing index provided") FastaRef = FastaReader(ref1_path, ref2_path, write_merge=False) Index = ExistingIndex(bwa_path, index_path) # If no index or if an error occured during validation of the existing index = create a new one except Exception as E: print (E) print("Merge References...") mkdir(ref_outdir) FastaRef = FastaReader([ref1_path,ref2_path], write_merge=True, output="merged.fa") print("Generating index...") mkdir(db_outdir) Index = GenerateIndex(bwa_path, FastaRef.merge_ref, index_opt) remove (FastaRef.merge_ref)
from __future__ import print_function from FastaReader import FastaReader f = FastaReader("tmp.fa") count = 0 for r in f: rlen = len(r.sequence) print(">ccs/{}/{}_{}".format(count, 0, rlen)) for s in xrange(0, rlen, 60): print(r.sequence[s:s + 60]) count += 1