def ProcessExperimentSeparate(experiment_id, json, batch_dir, sample_dir_id, preserve, failed_accession, skip_files=False):
    m = ExtractExperimentMetadata(experiment_id, json)
    if m.valid_metadata():
        # Check if a run ID was submitted, and if so only process that
        if experiment_id in m.runIDs: m.runIDs = [experiment_id]
        # Process the runIDs as samples
        _logger.info("Found Following Runs: %s", ', '.join(m.runIDs))
        for runid in m.runIDs:
            with TemporaryDirectory() as tmpdir:
                os.chdir(batch_dir)
                sample_dir = "%s/%s/"%(batch_dir, sample_dir_id)
                if os.path.exists(sample_dir):
                    sfiles = [x for x in os.listdir(sample_dir) if any([y in x for y in ['fq','fastq']])]
                else:
                    sfiles = []
                if not preserve or not skip_files or len(sfiles) == 0:
                    sfiles = DownloadRunFiles(runid, tmpdir)
                if sfiles is not None:
                    success = CreateSampleDir(sfiles, m, sample_dir, preserve, skip_files)
                    if success:
                        sample_dir_id += 1
                    else:
                        failed_accession.append(runid)
                else:
                    _logger.error("Files could not be retrieved! (%s)", runid)
                    failed_accession.append(runid)
    else:
        _logger.error("Metadata Invalid! (%s) - %s", experiment_id, m.metadata.items())
        failed_accession.append(experiment_id)
    return sample_dir_id
def CreateSampleDir(sfiles, m, sample_dir, preserve=False, skip_files=False):
    sample_dir = str(sample_dir)
    if not skip_files and len(sfiles) == 0:
            _logger.error("Error: No files were found! (%s)", sample_dir)
            return False
    if not os.path.exists(sample_dir):
        _logger.info("Create sample dir: %s", sample_dir)
        # Create 'sample' dir
        os.mkdir(sample_dir)
        # Move files from tmpdir to sample dir
        for sf in sfiles: move(sf, sample_dir)
    elif not preserve and not skip_files:
        # Empty sample directory
        for fn in os.listdir(sample_dir):
            os.unlink("%s/%s"%(sample_dir, fn))
        # Move files from tmpdir to sample dir
        for sf in sfiles: move(sf, sample_dir)
    # Update and create metadata file
    try:
        m.metadata["file_names"] = ' '.join(
            [os.path.basename(sf).replace(' ','_')
                for sf in sfiles
                if not os.path.basename(sf) == 'meta.json']
            )
        m.save_metadata(sample_dir)
    except ValueError, e:
        _logger.error(e)
        return False
def download_fastq_from_list(accession_list, output, json, preserve=False, all_runs_as_samples=False, skip_files=False):
    """
    Get Fastq from list of IDs

    :param accession_list: List of accessions
    :param dir: Output folder
    """
    metadata = []
    cwd = os.getcwd()
    with open(accession_list, 'r') as f:
        # Setup batch dir
        batch_dir = "%s/%s/"%(cwd, output)
        if not os.path.exists(batch_dir): os.mkdir(batch_dir)
        os.chdir(batch_dir)
        # Set logging
        _logger.Set(filename="%s/download-acceession-list.log"%batch_dir)
        # Count samples in accession_list
        n_samples = sum(1 for l in f)
        f.seek(0)
        _logger.info("Number of samples to download: %s", n_samples)
        # Start progress bar
        pbar = ProgressBar(
            widgets = [ETA(), ' - ', Percentage(), ' : ', Bar()],
            maxval  = n_samples
        ).start()
        pbar.update(0)
        failed_accession = []
        sample_dir_id = 0
        for i, l in enumerate(f):
            accession = l.strip()
            if accession == '': continue
            # Determine accession type
            if accession[:3] in acctypes:
                accession_type = acctypes[accession[:3]]
            else:
                _logger.error("unknown accession type for '%s'!", accession)
                failed_accession.append(accession)
                continue
            _logger.info("Acc Found: %s (%s)", accession, accession_type)
            if accession_type in ['study', 'sample']:
                for experiment_id in ExtractExperimentIDs_acc(accession):
                    sample_dir_id = ProcessExperiment(
                        experiment_id, json, batch_dir,sample_dir_id, preserve,
                        failed_accession, all_runs_as_samples, skip_files)
            elif accession_type == 'experiment':
                sample_dir_id = ProcessExperiment(
                    accession, json, batch_dir,sample_dir_id, preserve,
                    failed_accession, all_runs_as_samples, skip_files)
            elif accession_type == 'run':
                sample_dir_id = ProcessExperiment(
                    accession, json, batch_dir,sample_dir_id, preserve,
                    failed_accession, all_runs_as_samples, skip_files)
            pbar.update(i)
        pbar.finish()
        if failed_accession:
            _logger.info("The following accessions were not downloaded!")
            _logger.info('\n'.join(failed_accession))
        else:
            _logger.info("All accessions downloaded succesfully!")
def DownloadRunFiles(runid, tmpdir):
    # Download run files
    try:
        s = Sequence(runid, tmpdir)
        s.download_fastq()
        if not s.error:
            _logger.info("Downloaded files: %s", ','.join(s.files))
            return s.files
        else: return None
    except ValueError, e:
        _logger.error(e)
        return None
Beispiel #5
0
    def download_fastq(self):
        '''
        Download Fastq associated with Accession from ENA

        :param run_accession: Run Accession ID from ENA
        :return: True
        '''
        try:
            Path(self.dir).makedirs_p()
            retcode = call(self.download, stdout=PIPE)
        except OSError as e:
            _logger.error('FastQ Failed: %s [%s]', self.accession, e)
            _logger.error('CMD: %s', self.download)
            Sequence.__errors[self.accession] = 'FastQ Failed'
            self.error = True
        else:
            if retcode < 0:
                _logger.error('Child was terminated by signal')
                self.error = True
                Sequence.__errors[self.accession] = 'Child was'\
                    'terminated'\
                    '(signal)'
            else:
                _logger.info('Success: %s', self.accession)
                self.files = [
                    f.abspath() for f in Path(self.dir).files()
                ]
                Sequence.__sequence_id += 1
def ProcessExperimentCombined(experiment_id, json, batch_dir, sample_dir_id, preserve, failed_accession, skip_files=False):
    m = ExtractExperimentMetadata(experiment_id, json)
    if m.valid_metadata():
        # Check if a run ID was submitted, and if so only process that
        if experiment_id in m.runIDs: m.runIDs = [experiment_id]
        # Process the runs as one sample
        _logger.info("Found Following Runs: %s", ', '.join(m.runIDs))
        with TemporaryDirectory() as tmpdir:
            os.chdir(batch_dir)
            sample_dir = "%s/%s/"%(batch_dir, sample_dir_id)
            csfiles = []
            if preserve and os.path.exists(sample_dir):
                csfiles = [x for x in os.listdir(sample_dir) if any([y in x for y in ['fq','fastq']])]
            if csfiles == [] and not skip_files:
                sfiles = []
                for runid in m.runIDs:
                    sf = DownloadRunFiles(runid, tmpdir)
                    if sf is not None:
                        sfiles.append(sf)
                    else:
                        _logger.error("Run files could not be retrieved! (%s)",
                                      runid)
                _logger.info("Found Following files sets:\n%s\n",
                             '\n'.join([', '.join(sf) for sf in sfiles]))
                # Combine sfiles into one entry
                if len(sfiles) > 1:
                    for file_no, file_set in enumerate(zip(*sfiles)):
                        ext = '.'.join(file_set[0].split('/')[-1].split('.')[1:])
                        if len(sfiles[0]) > 1:
                            new_file = "%s_%s.combined.%s"%(experiment_id,file_no+1, ext)
                        else:
                            new_file = "%s.combined.%s"%(experiment_id, ext)
                        with open(new_file, 'w') as nf:
                            for fn in file_set:
                                with open(fn, 'rb') as f:
                                    nf.write(f.read())
                        if os.path.exists(new_file):
                            csfiles.append(new_file)
                        else:
                            _logger.error("Combined file creation failed! (%s: %s)",
                                          experiment_id, file_no)
                            break
                elif isinstance(sfiles[0], list):
                    csfiles = sfiles[0]
                if csfiles == []:
                    _logger.error("Files could not be combined! (%s)",
                                  experiment_id)
                    failed_accession.append(experiment_id)
            if csfiles != [] or skip_files:
                success = CreateSampleDir(csfiles, m, sample_dir, preserve, skip_files)
                if success:
                    sample_dir_id += 1
                else:
                    failed_accession.append(experiment_id)
            else:
                _logger.error("Files could not be retrieved! (%s)",
                              experiment_id)
                failed_accession.append(experiment_id)
    else:
        _logger.error("Metadata Invalid! (%s) - %s", experiment_id, m.metadata.items())
        failed_accession.append(experiment_id)
    return sample_dir_id