def createMikadoGTFlist(self, out_file, out_dir, searchPath, searchQuery="*.gtf", strand=False): """Create a file to be used by mikado configure """ files = pe.find_files(searchPath, searchQuery) args = files #create out dir if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) outFilePath = os.path.join(out_dir, out_file + ".txt") gtfs = [] for l in args: thisName = pu.get_file_basename(l) if thisName: gtfs.append("\t".join([l, thisName, str(strand)])) f = open(outFilePath, "w") f.write("\n".join(gtfs)) f.close() pu.print_green("Mikado list file written to:" + outFilePath) return outFilePath
def search_fastq(self, path): """Search .fastq file under a dir and create SRA object Return True if found otherwise False """ #search files under the path fq_files = pe.find_files(path, "*.fastq") if len(fq_files) < 1: return False if len(fq_files) > 2: pu.print_boldred("Can not determine .fastq. Exiting...") return False fq_files.sort() #case with single fastq if len(fq_files) == 1: self.localfastqPath = fq_files[0] pu.print_green("Found .fastq " + self.localfastqPath) self.layout = "SINGLE" #case with paired fastq if len(fq_files) == 2: self.localfastq1Path = fq_files[0] self.localfastq2Path = fq_files[1] pu.print_green("Found .fastq " + self.localfastq1Path + " " + self.localfastq2Path) self.layout = "PAIRED" #self.location=path #self.srr_accession=pu.get_file_basename(fq_files[0]) return True
def search_sra(self, path): """Search .sra file under a dir Return True if found otherwise False """ #search files under the path sra_files = pe.find_files(path, "*.sra") if len(sra_files) < 1: return False if len(sra_files) > 1: pu.print_boldred( "Found multiple .sra files. Using the first entry...") sra_path = sra_files[0] #self.location=path self.srr_accession = pu.get_file_basename(sra_path) self.localSRAFilePath = sra_path self.sraFileSize = pu.get_file_size(self.localSRAFilePath) #test if file is paired or single end if pe.is_paired(self.localSRAFilePath): self.layout = "PAIRED" else: self.layout = "SINGLE" pu.print_green("Found .sra " + self.localSRAFilePath) return True
def createMikadoGTFlist(self, out_file, out_dir, searchPath, searchQuery="*.gtf", strand=False): """Create a file to be used by mikado configure out_file: str outfile name out_dir: str path to out_dir searchPath: str Path where gtf/gff files will be searched searchQuery: str Query to perform search. Default: "*.gtf" strand: bool Stranded flag: Default false """ files = pe.find_files(searchPath, searchQuery, recursive=True) args = files #create out dir if not pu.check_paths_exist(out_dir): pu.mkdir(out_dir) outFilePath = os.path.join(out_dir, out_file + ".txt") gtfs = [] for l in args: thisName = pu.get_file_basename(l) if thisName: gtfs.append("\t".join([l, thisName, str(strand)])) f = open(outFilePath, "w") f.write("\n".join(gtfs)) f.close() pu.print_green("Mikado list file written to:" + outFilePath) return outFilePath
def download_fastq(self, verbose=False, quiet=False, logs=True, procs=2, **kwargs): """Function to download fastq files """ #check if fastq files exists already if self.fastqFilesExistsLocally(): pu.print_green("Fastq files exist already") return True fasterqdumpArgsList = [ '-f', '-t', '-s', '-N', '-X', '-a', '-p', '-c', '-o', '-O', '-h', '-V', '-L', '-v', '-q', '-b', '-m', '-x', '-S', '-3', '-P', '-M', '-B', '--option-file', '--strict', '--table', '--include-technical', '--skip-technical', '--concatenate-reads' ] fstrqd_Cmd = ['fasterq-dump'] fstrqd_Cmd.extend(pu.parse_unix_args(fasterqdumpArgsList, kwargs)) #add location fstrqd_Cmd.extend(['-O', self.location]) #add output filename. output will be <srr_accession>.fastq or <srr_accession>_1.fastq and <srr_accession>_2.fastq fstrqd_Cmd.extend(['-o', self.srr_accession + ".fastq"]) fstrqd_Cmd.extend(['-e', str(procs)]) if self.sraFileExistsLocally(): fstrqd_Cmd.append(self.localSRAFilePath) else: fstrqd_Cmd.append(self.srr_accession) #execute command cmdStatus = pe.execute_command(fstrqd_Cmd, objectid=self.srr_accession) if not cmdStatus: print("fasterqdump failed for:" + self.srr_accession) return False if not hasattr(self, 'layout'): fq_files = pe.find_files(self.location, self.srr_accession + "*.fastq") if len(fq_files) == 1: self.layout = 'SINGLE' else: self.layout = 'PAIRED' #check if fastq files are downloaded if (self.layout == "SINGLE"): self.localfastqPath = os.path.join(self.location, self.srr_accession + ".fastq") if not pu.check_files_exist(self.localfastqPath): pu.print_boldred("Error running fasterq-dump file. File " + self.localfastqPath + " does not exist!!!") return False else: self.localfastq1Path = os.path.join( self.location, self.srr_accession + "_1.fastq") self.localfastq2Path = os.path.join( self.location, self.srr_accession + "_2.fastq") if not pu.check_files_exist(self.localfastq1Path, self.localfastq2Path): pu.print_boldred("Error running fasterq-dump file. File " + self.localfastq1Path + " does not exist!!!") return False return True