Esempio n. 1
0
    def createMikadoGTFlist(self,
                            out_file,
                            out_dir,
                            searchPath,
                            searchQuery="*.gtf",
                            strand=False):
        """Create a file to be used by mikado configure
        """

        files = pe.find_files(searchPath, searchQuery)
        args = files

        #create out dir
        if not pu.check_paths_exist(out_dir):
            pu.mkdir(out_dir)
        outFilePath = os.path.join(out_dir, out_file + ".txt")

        gtfs = []
        for l in args:
            thisName = pu.get_file_basename(l)
            if thisName:
                gtfs.append("\t".join([l, thisName, str(strand)]))

        f = open(outFilePath, "w")
        f.write("\n".join(gtfs))
        f.close()

        pu.print_green("Mikado list file written to:" + outFilePath)
        return outFilePath
Esempio n. 2
0
    def search_fastq(self, path):
        """Search .fastq file under a dir and create SRA object
        Return True if found otherwise False
        """
        #search files under the path
        fq_files = pe.find_files(path, "*.fastq")

        if len(fq_files) < 1:
            return False

        if len(fq_files) > 2:
            pu.print_boldred("Can not determine .fastq. Exiting...")
            return False

        fq_files.sort()
        #case with single fastq
        if len(fq_files) == 1:
            self.localfastqPath = fq_files[0]
            pu.print_green("Found .fastq " + self.localfastqPath)
            self.layout = "SINGLE"

        #case with paired fastq
        if len(fq_files) == 2:
            self.localfastq1Path = fq_files[0]
            self.localfastq2Path = fq_files[1]
            pu.print_green("Found .fastq " + self.localfastq1Path + " " +
                           self.localfastq2Path)
            self.layout = "PAIRED"

        #self.location=path
        #self.srr_accession=pu.get_file_basename(fq_files[0])
        return True
Esempio n. 3
0
    def search_sra(self, path):
        """Search .sra file under a dir
        Return True if found otherwise False
        """
        #search files under the path

        sra_files = pe.find_files(path, "*.sra")

        if len(sra_files) < 1:
            return False

        if len(sra_files) > 1:
            pu.print_boldred(
                "Found multiple .sra files. Using the first entry...")
        sra_path = sra_files[0]
        #self.location=path
        self.srr_accession = pu.get_file_basename(sra_path)
        self.localSRAFilePath = sra_path
        self.sraFileSize = pu.get_file_size(self.localSRAFilePath)
        #test if file is paired or single end
        if pe.is_paired(self.localSRAFilePath):
            self.layout = "PAIRED"
        else:
            self.layout = "SINGLE"

        pu.print_green("Found .sra " + self.localSRAFilePath)
        return True
Esempio n. 4
0
    def createMikadoGTFlist(self,
                            out_file,
                            out_dir,
                            searchPath,
                            searchQuery="*.gtf",
                            strand=False):
        """Create a file to be used by mikado configure
        out_file: str
            outfile name
        out_dir: str
            path to out_dir
        searchPath: str
            Path where gtf/gff files will be searched
        searchQuery: str
            Query to perform search. Default: "*.gtf"
        strand: bool
            Stranded flag: Default false
        
            
        """

        files = pe.find_files(searchPath, searchQuery, recursive=True)
        args = files

        #create out dir
        if not pu.check_paths_exist(out_dir):
            pu.mkdir(out_dir)
        outFilePath = os.path.join(out_dir, out_file + ".txt")

        gtfs = []
        for l in args:
            thisName = pu.get_file_basename(l)
            if thisName:
                gtfs.append("\t".join([l, thisName, str(strand)]))

        f = open(outFilePath, "w")
        f.write("\n".join(gtfs))
        f.close()

        pu.print_green("Mikado list file written to:" + outFilePath)
        return outFilePath
Esempio n. 5
0
    def download_fastq(self,
                       verbose=False,
                       quiet=False,
                       logs=True,
                       procs=2,
                       **kwargs):
        """Function to download fastq files
        """

        #check if fastq files exists already
        if self.fastqFilesExistsLocally():
            pu.print_green("Fastq files exist already")
            return True

        fasterqdumpArgsList = [
            '-f', '-t', '-s', '-N', '-X', '-a', '-p', '-c', '-o', '-O', '-h',
            '-V', '-L', '-v', '-q', '-b', '-m', '-x', '-S', '-3', '-P', '-M',
            '-B', '--option-file', '--strict', '--table',
            '--include-technical', '--skip-technical', '--concatenate-reads'
        ]
        fstrqd_Cmd = ['fasterq-dump']
        fstrqd_Cmd.extend(pu.parse_unix_args(fasterqdumpArgsList, kwargs))
        #add location
        fstrqd_Cmd.extend(['-O', self.location])
        #add output filename. output will be <srr_accession>.fastq or <srr_accession>_1.fastq and <srr_accession>_2.fastq
        fstrqd_Cmd.extend(['-o', self.srr_accession + ".fastq"])
        fstrqd_Cmd.extend(['-e', str(procs)])
        if self.sraFileExistsLocally():
            fstrqd_Cmd.append(self.localSRAFilePath)
        else:
            fstrqd_Cmd.append(self.srr_accession)

        #execute command
        cmdStatus = pe.execute_command(fstrqd_Cmd, objectid=self.srr_accession)
        if not cmdStatus:
            print("fasterqdump failed for:" + self.srr_accession)
            return False

        if not hasattr(self, 'layout'):
            fq_files = pe.find_files(self.location,
                                     self.srr_accession + "*.fastq")
            if len(fq_files) == 1:
                self.layout = 'SINGLE'
            else:
                self.layout = 'PAIRED'

        #check if fastq files are downloaded
        if (self.layout == "SINGLE"):
            self.localfastqPath = os.path.join(self.location,
                                               self.srr_accession + ".fastq")
            if not pu.check_files_exist(self.localfastqPath):
                pu.print_boldred("Error running fasterq-dump file. File " +
                                 self.localfastqPath + " does not exist!!!")
                return False
        else:
            self.localfastq1Path = os.path.join(
                self.location, self.srr_accession + "_1.fastq")
            self.localfastq2Path = os.path.join(
                self.location, self.srr_accession + "_2.fastq")
            if not pu.check_files_exist(self.localfastq1Path,
                                        self.localfastq2Path):
                pu.print_boldred("Error running fasterq-dump file. File " +
                                 self.localfastq1Path + " does not exist!!!")
                return False

        return True