Ejemplo n.º 1
0
def downloadS3File(in_file,aws):
	res = []
	for f in in_file:
		cmd = aws+' s3 cp '+f+' '+os.getcwd()+'/'+os.path.basename(f)
		utils.logging_call(cmd, shell=True)
		res.append(os.path.basename(f))
	return(res)
Ejemplo n.º 2
0
def trimAdapters(cutadapt, infile, outfile, adapt3='NA', adapt5='NA'):

    ## Determine how many adapters there are, and the ended-ness of the data
    is_paired_end = False if len(infile) == 1 else True
    is_three_prime = False if adapt3 == 'NA' else True
    is_five_prime = False if adapt5 == 'NA' else True

    ## Check to make sure at least one adapter is present
    if (not is_three_prime and not is_five_prime):
        raise RuntimeError('Missing adapters to use for trimming')

    ## If both adapters are present and this is single ended data, use linked trimming
    if (is_three_prime and is_five_prime and not is_paired_end):
        trim_string = '-a ' + adapt5 + '...' + adapt3 + ' -o ' + outfile[
            0] + ' ' + infile[0]

    ## If only single ended and 5' adapter, use anchored 5' trimming
    elif (is_five_prime and not is_three_prime and not is_paired_end):
        trim_string = '-g ^' + adapt5 + ' -o ' + outfile[0] + ' ' + infile[0]

    ## If only single ended and 3' adapter, use default 3' trimming
    elif (is_three_prime and not is_five_prime and not is_paired_end):
        trim_string = '-a ' + adapt3 + ' -o ' + outfile[0] + ' ' + infile[0]

    ## If paired end, make sure that both adapters are present
    elif (is_three_prime and is_five_prime and is_paired_end):
        trim_string = '-a ' + adapt3 + ' -A ' + adapt5 + ' -o ' + outfile[
            0] + ' -p ' + outfile[1] + ' ' + infile[0] + ' ' + infile[1]

    trim_cmd = cutadapt + ' ' + trim_string + ' -m 15 -e 0.1 -O 5'
    bench_obj = utils.logging_call(trim_cmd, shell=True)

    return (bench_obj)
Ejemplo n.º 3
0
def s3Upload(file, destination, prog='aws'):
    if destination[-1] != '/':
        destination += '/'

    print(type(file))
    print(file)

    if not isinstance(file, list):
        file = file.split(' ')

    for f in file:
        cmd = '%s s3 cp %s %s' % (prog, f, destination + f)

        ## Attempt to upload
        try:
            utils.logging_call(cmd, shell=True)
        except subprocess.CalledProcessError:
            logging.error('S3 upload failed. See above for more details.')
            exit(1)

    return (True)
Ejemplo n.º 4
0
def downloadSRAFile(accession,fastq_dump):
	res = []
	for f in accession:
	
		## Download from the NCBI ftp site, extract fastq.gz, remove downloaded file
		## pefetch is more reliable than fastq-dump to retrieve the sra file
		cmd = 'prefetch -v ' + f + ' -O ' + os.getcwd() + ' && ' + fastq_dump + ' ' + os.getcwd() + '/' + f + '.sra --gzip -O ' + os.getcwd() + ' && rm ' + os.getcwd() + '/' + f + '.sra'
		
		## If download times out on the first try, give it up to 2 more tries
		for i in range(3):
			try:
				utils.logging_call(cmd, shell=True)
			except subprocess.CalledProcessError as e:
				print('Timeout: ' + e.cmd)
				if (i == 2):
					raise e
			else:
				break
				
		res.append(os.getcwd() + '/' +f+'.fastq.gz')
	return(res)
Ejemplo n.º 5
0
def s3Upload(file, destination, prog='aws'):
    if destination[-1] != '/':
        destination += '/'

    if not isinstance(file, list):
        file = file.split(' ')

    for f in file:
        cmd = '%s s3 cp %s %s' % (prog, f, destination + f)
        ## command to remove encrypted file
        cmd_rm = 'rm %s' % (f)

        ## Attempt to upload
        try:
            ## upload
            utils.logging_call(cmd, shell=True)
            ## remove encrypted file after uploading
            utils.logging_call(cmd_rm, shell=True)
        except subprocess.CalledProcessError:
            logging.error('S3 upload failed. See above for more details.')
            exit(1)
    return (True)