def get_lane_input_files(self):
     
     metadata_tar = '%s.metadata.tar*' % self.run_name
     self.metadata_tar_id = dxpy.find_one_data_object(classname = 'file',
                                               name = metadata_tar,
                                               name_mode = 'glob',
                                               project = self.project_id,
                                               folder = '/raw_data',
                                               zero_ok = False,
                                               more_ok = True
                                              )['id']
     lane_tar = '%s_L%d.tar*' % (self.run_name, self.lane_index)
     self.lane_tar_id = dxpy.find_one_data_object(classname = 'file',
                                               name = lane_tar,
                                               name_mode = 'glob',
                                               project = self.project_id,
                                               folder = '/raw_data',
                                               zero_ok = False,
                                               more_ok = True
                                              )['id']
     interop_tar = '%s.InterOp.tar*' % (self.run_name)
     self.interop_tar_id = dxpy.find_one_data_object(classname = 'file',
                                               name = interop_tar,
                                               name_mode = 'glob',
                                               project = self.project_id,
                                               folder = '/raw_data',
                                               zero_ok = False,
                                               more_ok = True
                                              )['id']
Esempio n. 2
0
    def test_paired_with_contam(self):
        bed_file = dxpy.find_one_data_object(
            name="hg19_GRCh37_Feb2009_RefSeq.bed")['id']
        mappings = dxpy.find_one_data_object(
            name="SRR018256_paired_RNA_Mappings",
            typename="LetterMappings")['id']
        contam_contig = dxpy.find_one_data_object(name="human rRNA",
                                                  typename="ContigSet")['id']
        reads = dxpy.find_one_data_object(name="SRR018256_reads",
                                          typename="LetterReads")['id']
        if bed_file == None:
            print "Cannot find hg19_GRCh37_Feb2009_RefSeq.bed.  Please upload it"
            return False
        if mappings == None:
            print "Cannot find Mappings.  Please upload them"
            return False
        if contam_contig == None:
            print "Cannot find human rRNA.  Please upload it"
            return False
        if reads == None:
            print "Cannot find SRR018256_reads.  Please upload it"
            return False

        input = {
            'rna_seq_mappings': dxpy.dxlink(mappings),
            'bed_file': dxpy.dxlink(bed_file),
            'contaminants': [dxpy.dxlink(contam_contig)],
            'original_reads': [dxpy.dxlink(reads)]
        }

        print "Running program with", input
        job = self.program.run(input)
        print "launched test_paired_with_contam ", job.get_id()
Esempio n. 3
0
    def test_paired_with_contam(self):
        bed_file = dxpy.find_one_data_object(name="hg19_GRCh37_Feb2009_RefSeq.bed")['id']
        mappings = dxpy.find_one_data_object(name="SRR018256_paired_RNA_Mappings", typename="LetterMappings")['id']
        contam_contig = dxpy.find_one_data_object(name="human rRNA", typename="ContigSet")['id']
        reads = dxpy.find_one_data_object(name="SRR018256_reads", typename="LetterReads")['id']
        if bed_file == None:
            print "Cannot find hg19_GRCh37_Feb2009_RefSeq.bed.  Please upload it"
            return False
        if mappings == None:
            print "Cannot find Mappings.  Please upload them"
            return False
        if contam_contig == None:
            print "Cannot find human rRNA.  Please upload it"
            return False
        if reads == None:
            print "Cannot find SRR018256_reads.  Please upload it"
            return False

        input = { 'rna_seq_mappings': dxpy.dxlink(mappings), 
                  'bed_file': dxpy.dxlink(bed_file),
                  'contaminants': [dxpy.dxlink(contam_contig)],
                  'original_reads': [dxpy.dxlink(reads)] }

        print "Running program with", input
        job = self.program.run(input)
        print "launched test_paired_with_contam ", job.get_id()
Esempio n. 4
0
def resolve_file(identifier):
	logging.debug("resolve_file: %s" %(identifier))

	if not identifier:
		return None

	m = re.match(r'''^([\w\-\ \.]+):([\w\-\ /\.]+)''', identifier)
	if m:
		project_identifier = m.group(1)
		file_identifier = m.group(2)
	else:
		logging.debug("Defaulting to the current project")
		project_identifier = dxpy.WORKSPACE_ID
		file_identifier = identifier	

	project = resolve_project(project_identifier)
	logging.debug("Got project %s" %(project.name))
	logging.debug("Now looking for file %s" %(file_identifier))

	m = re.match(r'''(^[\w\-\ /\.]+)/([\w\-\ \.]+)''', file_identifier)
	if m:
		folder_name = m.group(1)
		if not folder_name.startswith('/'):
			folder_name = '/' + folder_name
		file_name = m.group(2)
	else:
		# folder_name = '/'
		folder_name = None
		file_name = file_identifier

	logging.debug("Looking for file %s in folder %s" %(file_name, folder_name))

	try:
		if folder_name:
			file_handler = dxpy.find_one_data_object(name=file_name, folder=folder_name, project=project.get_id(),
				recurse=False, more_ok=False, zero_ok=False, return_handler=True)
		else:
			file_handler = dxpy.find_one_data_object(name=file_name, project=project.get_id(), folder='/',
				recurse=True, more_ok=False, zero_ok=False, return_handler=True)
	except dxpy.DXSearchError:
		logging.debug('%s not found in project %s folder %s.  Trying as file ID' %(file_name, project.get_id(), folder_name))
		try:
			file_handler = dxpy.DXFile(dxid=identifier, mode='r')
		except:
			logging.debug('%s not found as a dxid' %(identifier))
			try:
				file_handler = resolve_accession(identifier)
			except:
				logging.debug('%s not found as an accession' %(identifier))
				logging.warning('Could not find file %s.' %(identifier))
				return None
	except:
		raise
	logging.info("Resolved file identifier %s to %s" %(identifier, file_handler.get_id()))
	return file_handler
Esempio n. 5
0
def spp(experiment, control, xcor_scores, chrom_sizes, spp_version,
        bigbed=False, as_file=None, name="spp", prefix=None,
        fragment_length=None, spp_instance=None):
    spp_applet = \
        dxpy.find_one_data_object(
            classname='applet',
            name='spp',
            project=dxpy.PROJECT_CONTEXT_ID,
            zero_ok=False,
            more_ok=False,
            return_handler=True)
    spp_input = {"experiment": experiment,
                 "control": control,
                 "xcor_scores_input": xcor_scores,
                 "bigbed": bigbed,
                 "chrom_sizes": chrom_sizes,
                 "spp_version": spp_version}
    if fragment_length is not None:
        spp_input.update({"fragment_length": fragment_length})
    if bigbed and as_file:
        spp_input.update({"as_file": as_file})
    if prefix:
        spp_input.update({"prefix": prefix})
    spp_args = {
        'name': name,
    }
    if spp_instance:
        spp_args.update({
            'instance_type': spp_instance
        })
    return spp_applet.run(spp_input, **spp_args)
Esempio n. 6
0
def resolve_accession(accession):
	logging.debug("Looking for accession %s" %(accession))
	
	if not re.match(r'''^ENCFF\d{3}[A-Z]{3}''', accession):
		logging.debug("%s is not a valid accession format" %(accession))
		raise ValueError(accession)
	
	DNANEXUS_ENCODE_SNAPSHOT = 'ENCODE-SDSC-snapshot-20140505'
	logging.debug('Testing')

	try:
		snapshot_project
	except:
		logging.debug('Looking for snapshot project %s' %(DNANEXUS_ENCODE_SNAPSHOT))
		try:
			project_handler = resolve_project(DNANEXUS_ENCODE_SNAPSHOT)
			global snapshot_project
			snapshot_project = project_handler
		except:
			logging.error("Cannot find snapshot project %s" %(DNANEXUS_ENCODE_SNAPSHOT))
			raise ValueError(DNANEXUS_ENCODE_SNAPSHOT)
		logging.debug('Found snapshot project %s' %(snapshot_project.name))

	try:
		accession_search = accession + '*'
		logging.debug('Looking recursively for %s in %s' %(accession_search, snapshot_project.name))
		file_handler = dxpy.find_one_data_object(
			name=accession_search, name_mode='glob', more_ok=False, classname='file', recurse=True, return_handler=True,
			folder='/', project=snapshot_project.get_id())
		logging.debug('Got file handler for %s' %(file_handler.name))
		return file_handler
	except:
		logging.error("Cannot find accession %s in project %s" %(accession, snapshot_project.name))
		raise ValueError(accession)
Esempio n. 7
0
    def test_unpaired(self):
        bed_file = dxpy.find_one_data_object(name="hg19_GRCh37_Feb2009_RefSeq.bed")['id']
        mappings = dxpy.find_one_data_object(name="unpaired_RNA-Seq_mappings", typename="LetterMappings")['id']
        if bed_file == None:
            print "Cannot find hg19_GRCh37_Feb2009_RefSeq.bed.  Please upload it"
            return False
        if mappings == None:
            print "Cannot find unpaired_RNA-Seq_mappings.  Please upload it"
            return False

        input = { 'rna_seq_mappings': dxpy.dxlink(mappings), 
                  'bed_file': dxpy.dxlink(bed_file) }

        print "Running program with", input
        job = self.program.run(input)
        print "launched test_unpaired ", job.get_id()
Esempio n. 8
0
def macs2(experiment,
          control,
          xcor_scores,
          chrom_sizes,
          narrowpeak_as,
          gappedpeak_as,
          broadpeak_as,
          genomesize,
          prefix=None):
    macs2_applet = dxpy.find_one_data_object(classname='applet',
                                             name='macs2',
                                             project=dxpy.PROJECT_CONTEXT_ID,
                                             zero_ok=False,
                                             more_ok=False,
                                             return_handler=True)
    macs2_input = {
        "experiment": experiment,
        "control": control,
        "xcor_scores_input": xcor_scores,
        "chrom_sizes": chrom_sizes,
        "narrowpeak_as": narrowpeak_as,
        "gappedpeak_as": gappedpeak_as,
        "broadpeak_as": broadpeak_as,
        "genomesize": genomesize
    }
    if prefix:
        macs2_input.update({'prefix': prefix})
    return macs2_applet.run(macs2_input)
 def download_qc_report(self, download_dir):
     """
     Downloads the QC report from the DNAnexus sequencing results project.
  
     Args: 
         download_dir: `str` - The local directory path to download the QC report to.
 
     Returns:
         `str`. The filepath to the downloaded QC report.
     """
     if not os.path.isdir(download_dir):
         os.makedirs(download_dir)
     res = dxpy.find_one_data_object(project=self.dx_project_id,
                                     folder=self.DX_QC_REPORT_FOLDER,
                                     name="*_QC_Report.pdf",
                                     name_mode="glob")
     #res will be something like {u'project': u'project-BzqVkxj08kVZbPXk54X0P2JY', u'id': u'file-BzqVkg800Fb0z4437GXJfGY6'}
     #dxpy.find_one_data_object() raises a dxpy.exceptions.DXSearchError() if nothing is found.
     dx_file = dxpy.DXFile(dxid=res["id"], project=res["project"])
     download_file_name = os.path.join(download_dir, dx_file.name)
     msg = "{filename} to {download_dir}.".format(filename=dx_file.name,
                                                  download_dir=download_dir)
     debug_logger.debug("Downloading " + msg)
     dxpy.bindings.dxfile_functions.download_dxfile(
         dxid=dx_file, filename=download_file_name)
     success_logger.info("Downloaded " + msg)
     return download_file_name
def resolve_accession(accession):
	logging.debug("Looking for accession %s" %(accession))
	
	if not re.match(r'''^ENCFF\d{3}[A-Z]{3}''', accession):
		logging.debug("%s is not a valid accession format" %(accession))
		raise ValueError(accession)
	
	DNANEXUS_ENCODE_SNAPSHOT = 'ENCODE-SDSC-snapshot-20140505'
	logging.debug('Testing')

	try:
		snapshot_project
	except:
		logging.debug('Looking for snapshot project %s' %(DNANEXUS_ENCODE_SNAPSHOT))
		try:
			project_handler = resolve_project(DNANEXUS_ENCODE_SNAPSHOT)
			global snapshot_project
			snapshot_project = project_handler
		except:
			logging.error("Cannot find snapshot project %s" %(DNANEXUS_ENCODE_SNAPSHOT))
			raise ValueError(DNANEXUS_ENCODE_SNAPSHOT)
		logging.debug('Found snapshot project %s' %(snapshot_project.name))

	try:
		accession_search = accession + '*'
		logging.debug('Looking recursively for %s in %s' %(accession_search, snapshot_project.name))
		file_handler = dxpy.find_one_data_object(
			name=accession_search, name_mode='glob', more_ok=False, classname='file', recurse=True, return_handler=True,
			folder='/', project=snapshot_project.get_id())
		logging.debug('Got file handler for %s' %(file_handler.name))
		return file_handler
	except:
		logging.error("Cannot find accession %s in project %s" %(accession, snapshot_project.name))
		raise ValueError(accession)
Esempio n. 11
0
def spp(experiment,
        control,
        xcor_scores,
        chrom_sizes,
        spp_version,
        bigbed=False,
        as_file=None,
        name="spp",
        prefix=None,
        fragment_length=None):
    spp_applet = \
        dxpy.find_one_data_object(
            classname='applet',
            name='spp',
            project=dxpy.PROJECT_CONTEXT_ID,
            zero_ok=False,
            more_ok=False,
            return_handler=True)
    spp_input = {
        "experiment": experiment,
        "control": control,
        "xcor_scores_input": xcor_scores,
        "bigbed": bigbed,
        "chrom_sizes": chrom_sizes,
        "spp_version": spp_version
    }
    if fragment_length is not None:
        spp_input.update({"fragment_length": fragment_length})
    if bigbed and as_file:
        spp_input.update({"as_file": as_file})
    if prefix:
        spp_input.update({"prefix": prefix})
    return spp_applet.run(spp_input, name=name)
Esempio n. 12
0
def macs2(experiment, control, xcor_scores, chrom_sizes,
          narrowpeak_as, gappedpeak_as, broadpeak_as, genomesize,
          name="MACS2", prefix=None, fragment_length=None):
        macs2_applet = dxpy.find_one_data_object(
                classname='applet',
                name='macs2',
                project=dxpy.PROJECT_CONTEXT_ID,
                zero_ok=False,
                more_ok=False,
                return_handler=True)
        macs2_input = {
            "experiment": experiment,
            "control": control,
            "xcor_scores_input": xcor_scores,
            "chrom_sizes": chrom_sizes,
            "narrowpeak_as": narrowpeak_as,
            "gappedpeak_as": gappedpeak_as,
            "broadpeak_as": broadpeak_as,
            "genomesize": genomesize
            }
        if prefix:
            macs2_input.update({'prefix': prefix})
        if fragment_length is not None:
            macs2_input.update({'fragment_length': fragment_length})
        return macs2_applet.run(macs2_input, name=name)
def find_applet_by_name(applet_name):
    '''Looks up an applet by name in the project that holds tools.  From Joe Dale's code.'''

    found = dxpy.find_one_data_object(classname="applet", name=applet_name,
                                      project=ENCODE_CHIP_SEQ_PROJECT,
                                      zero_ok=False, more_ok=False, return_handler=True)
    print "Resolved %s to %s" % (applet_name, found.get_id())
    return found
Esempio n. 14
0
def spp(experiment, control, xcor_scores):
    spp_applet = dxpy.find_one_data_object(
        classname='applet', name='spp', zero_ok=False, more_ok=False, return_handler=True)
    return spp_applet.run(
        {"experiment": experiment,
         "control": control,
         "xcor_scores_input": xcor_scores},
         instance_type="mem2_ssd1_x8")
Esempio n. 15
0
def find_applet(applet_name):
    return dxpy.find_one_data_object(classname='applet',
                                     name=applet_name,
                                     project=project.get_id(),
                                     folder=applets_folder,
                                     zero_ok=False,
                                     more_ok=False,
                                     return_handler=True)
Esempio n. 16
0
def run_gene_ebseq(treatment_folder,control_folder,patient_id):
	"""
	Runs the 'rsem-ebseq gene expression' workflow I wrote.
	"""
	gene_glob = "*.genes.results"
	quant_file = dxpy.find_one_data_object(more_ok=False,project=jo_wu_project,folder=treatment_folder,name=gene_glob,name_mode="glob")
	quant_file = dxpy.dxlink(project_id=quant_file['project'],object_id=quant_file['id'])
	control_quant_file = dxpy.find_one_data_object(more_ok=False,project=jo_wu_project,folder=control_folder,name=gene_glob,name_mode="glob")
	control_quant_file = dxpy.dxlink(project_id=control_quant_file['project'],object_id=control_quant_file['id'])
	#the quant_files are the isoforms gene expression files created by rsem-calculate-expression
	treatment_barcode = os.path.split(treatment_folder)[-1]
	control_barcode = os.path.split(control_folder)[-1]
	wf = dxpy.DXWorkflow(project="project-BxxYqbQ0v3VQz5z2bvFyF6YV",dxid="workflow-ByFG8g00v3VpjkVxP0gkpX7X")
	destination_folder = os.path.join(treatment_folder,"ebseq")
	workflow_input = {"0.results":[quant_file,control_quant_file],"1.conditions": "1,1"}
	job_properties = {"patient_id":patient_id,"control_barcode":control_barcode,"treatment_barcode":treatment_barcode}
	job_name = "_".join([patient_id,treatment_barcode,control_barcode,wf.name])
	wf.run(debug={"debugOn":["AppError","AppInternalError"]},workflow_input=workflow_input,project=jo_wu_project,folder=destination_folder,name=job_name,properties=job_properties)
 def get_reference_ids(self):
     reference_genome_project = 'project-F3x6Zf89QqxF6vjK0qfkJG1y'
     self.reference_genome_dxid = dxpy.find_one_data_object(classname='file',
                                                          name='genome.fa.gz',
                                                          name_mode='exact',
                                                          project = reference_genome_project,
                                                          folder = '/%s' % self.reference_genome,
                                                          zero_ok = False,
                                                          more_ok = False
                                                          )['id']
     self.reference_index_dxid = dxpy.find_one_data_object(classname='file',
                                                         name='bwa_index.tar.gz',
                                                         name_mode='exact',
                                                         project = reference_genome_project,
                                                         folder = '/%s' % self.reference_genome,
                                                         zero_ok = False,
                                                         more_ok = False
                                                         )['id']
Esempio n. 18
0
def xcor_only(tags, paired_end):
    xcor_only_applet = dxpy.find_one_data_object(classname='applet',
                                                 name='xcor_only',
                                                 zero_ok=False,
                                                 more_ok=False,
                                                 return_handler=True)
    return xcor_only_applet.run({
        "input_tagAlign": tags,
        "paired_end": paired_end
    })
Esempio n. 19
0
def find_applet_by_name(applet_name, applets_project_id=APPLETS_PROJECT_ID):
    if (applet_name, applets_project_id) not in APPLETS:
        found = dxpy.find_one_data_object(classname="applet",
                                          name=applet_name,
                                          project=applets_project_id,
                                          zero_ok=False,
                                          more_ok=False,
                                          return_handler=True)
        APPLETS[(applet_name, applets_project_id)] = found
    return APPLETS[(applet_name, applets_project_id)]
Esempio n. 20
0
def run_isoform_ebseq(treatment_folder,control_folder,patient_id):
	"""
	Runs the 'rsem-ebseq isoform expression' workflow I wrote.
	"""
	isoform_glob = "*.isoforms.results"
	quant_file = dxpy.find_one_data_object(more_ok=False,project=jo_wu_project,folder=treatment_folder,name=isoform_glob,name_mode="glob")
	quant_file = dxpy.dxlink(project_id=quant_file['project'],object_id=quant_file['id'])
	control_quant_file = dxpy.find_one_data_object(more_ok=False,project=jo_wu_project,folder=control_folder,name=isoform_glob,name_mode="glob")
	control_quant_file = dxpy.dxlink(project_id=control_quant_file['project'],object_id=control_quant_file['id'])
	#the quant_files are the isoforms gene expression files created by rsem-calculate-expression
	treatment_barcode = os.path.split(treatment_folder)[-1]
	control_barcode = os.path.split(control_folder)[-1]
	wf = dxpy.DXWorkflow(project="project-BxxYqbQ0v3VQz5z2bvFyF6YV",dxid="workflow-ByFPY900v3Vbb8XJVxvzJ1f7")
	destination_folder = os.path.join(treatment_folder,"ebseq")
	transcripts_file = dxpy.dxlink(project_id="project-BxZpbXj0V610b5Q6x1FV80gb",object_id="file-ByBpjPQ0V61585gqXkvQ11Z5")
	workflow_input = {"0.results":[quant_file,control_quant_file],"1.input_fasta_file":transcripts_file,"1.output_name":"isoforms.results","2.conditions": "1,1"}
	job_properties = {"patient_id":patient_id,"control_barcode":control_barcode,"treatment_barcode":treatment_barcode}
	job_name = "_".join([patient_id,treatment_barcode,control_barcode,wf.name])
	wf.run(debug={"debugOn":["AppError","AppInternalError"]},workflow_input=workflow_input,project=jo_wu_project,folder=destination_folder,name=job_name,properties=job_properties)
def find_reference_file_by_name(reference_name):
    '''Looks up a reference file by name in the project that holds common tools. From Joe Dale's code.'''

    found = dxpy.find_one_data_object(classname="file", name=reference_name,
                                      project=ENCODE_CHIP_SEQ_PROJECT,
                                      folder='/Reference Data',
                                      recurse=True,
                                      zero_ok=False, more_ok=False, return_handler=True)
    print "Resolved %s to %s" % (reference_name, found.get_id())
    return dxpy.dxlink(found)
Esempio n. 22
0
def pooled(files):
    pool_applet = dxpy.find_one_data_object(
        classname='applet', name='pool', project=dxpy.PROJECT_CONTEXT_ID,
        zero_ok=False, more_ok=False, return_handler=True)
    logger.debug('input files:%s' %(files))
    logger.debug('input file ids:%s' %([dxf.get_id() for dxf in files]))
    logger.debug('input files dxlinks:%s' %([dxpy.dxlink(dxf) for dxf in files]))
    pool_subjob = pool_applet.run({"inputs": [dxpy.dxlink(dxf) for dxf in files]})
    pooled_file = pool_subjob.get_output_ref("pooled")
    return pooled_file
Esempio n. 23
0
    def test_unpaired(self):
        bed_file = dxpy.find_one_data_object(
            name="hg19_GRCh37_Feb2009_RefSeq.bed")['id']
        mappings = dxpy.find_one_data_object(name="unpaired_RNA-Seq_mappings",
                                             typename="LetterMappings")['id']
        if bed_file == None:
            print "Cannot find hg19_GRCh37_Feb2009_RefSeq.bed.  Please upload it"
            return False
        if mappings == None:
            print "Cannot find unpaired_RNA-Seq_mappings.  Please upload it"
            return False

        input = {
            'rna_seq_mappings': dxpy.dxlink(mappings),
            'bed_file': dxpy.dxlink(bed_file)
        }

        print "Running program with", input
        job = self.program.run(input)
        print "launched test_unpaired ", job.get_id()
Esempio n. 24
0
def xcor_only(tags, paired_end):
        xcor_only_applet = dxpy.find_one_data_object(
                classname='applet',
                name='xcor_only',
                zero_ok=False,
                more_ok=False,
                return_handler=True)
        return xcor_only_applet.run({
            "input_tagAlign": tags,
            "paired_end": paired_end
            })
Esempio n. 25
0
 def find_interop_file(self):
     ''' DERECATED '''
     interop_name = '%s.InterOp.tar.gz' % self.run_name
     interop_file = dxpy.find_one_data_object(classname='file',
                                              name=interop_name,
                                              name_mode='exact',
                                              project=self.project_id,
                                              folder='/',
                                              zero_ok=False,
                                              more_ok=True)
     return interop_file['id']
def find_applet_by_name(applet_name, applets_project_id=APPLETS_PROJECT_ID):
    if (applet_name, applets_project_id) not in APPLETS:
        found = dxpy.find_one_data_object(
            classname="applet",
            name=applet_name,
            project=applets_project_id,
            zero_ok=False,
            more_ok=False,
            return_handler=True)
        APPLETS[(applet_name, applets_project_id)] = found
    return APPLETS[(applet_name, applets_project_id)]
Esempio n. 27
0
def spp(experiment, control, xcor_scores, chrom_sizes, bigbed=False, as_file=None):
		spp_applet = dxpy.find_one_data_object(
				classname='applet', name='spp', project=dxpy.PROJECT_CONTEXT_ID,
				zero_ok=False, more_ok=False, return_handler=True)
		spp_input = {"experiment": experiment,
								 "control": control,
								 "xcor_scores_input": xcor_scores,
								 "bigbed": bigbed,
								 "chrom_sizes": chrom_sizes}
		if bigbed and as_file:
			spp_input.update({"as_file": as_file})
		return spp_applet.run(spp_input)
Esempio n. 28
0
def find_applet_by_name(applet_name, applets_project_id):
    '''Looks up an applet by name in the project that holds tools.  From Joe Dale's code.'''
    cached = '*'
    if (applet_name, applets_project_id) not in APPLETS:
        found = dxpy.find_one_data_object(classname="applet", name=applet_name,
                                          project=applets_project_id,
                                          zero_ok=False, more_ok=False, return_handler=True)
        APPLETS[(applet_name, applets_project_id)] = found
        cached = ''

    logging.info(cached + "Resolved applet %s to %s" %(applet_name, APPLETS[(applet_name, applets_project_id)].get_id()))
    return APPLETS[(applet_name, applets_project_id)]
Esempio n. 29
0
def find_applet_by_name(applet_name, applets_project_id):
    '''Looks up an applet by name in the project that holds tools.  From Joe Dale's code.'''
    cached = '*'
    if (applet_name, applets_project_id) not in APPLETS:
        found = dxpy.find_one_data_object(classname="applet", name=applet_name,
                                          project=applets_project_id,
                                          zero_ok=False, more_ok=False, return_handler=True)
        APPLETS[(applet_name, applets_project_id)] = found
        cached = ''

    logging.info(cached + "Resolved applet %s to %s" %(applet_name, APPLETS[(applet_name, applets_project_id)].get_id()))
    return APPLETS[(applet_name, applets_project_id)]
Esempio n. 30
0
def xcor_only(tags, paired_end, spp_version=None, name='xcor_only'):
    xcor_only_applet = \
        dxpy.find_one_data_object(
            classname='applet',
            name='xcor_only',
            project=dxpy.PROJECT_CONTEXT_ID,
            zero_ok=False,
            more_ok=False,
            return_handler=True)
    applet_input = {"input_tagAlign": tags, "paired_end": paired_end}
    if spp_version:
        applet_input.update({'spp_version': spp_version})
    return xcor_only_applet.run(applet_input, name=name)
def macs2(experiment, control, xcor_scores, chrom_sizes, narrowpeak_as, gappedpeak_as, broadpeak_as, genomesize):
		macs2_applet = dxpy.find_one_data_object(
				classname='applet', name='macs2', project=dxpy.PROJECT_CONTEXT_ID,
				project=zero_ok=False, more_ok=False, return_handler=True)
		macs2_input = { "experiment": experiment,
						"control": control,
						"xcor_scores_input": xcor_scores,
						"chrom_sizes": chrom_sizes,
						"narrowpeak_as": narrowpeak_as,
						"gappedpeak_as": gappedpeak_as,
						"broadpeak_as": broadpeak_as,
						"genomesize": genomesize }
		return macs2_applet.run(macs2_input)
Esempio n. 32
0
def xcor_only(tags, paired_end, name='xcor_only'):
        xcor_only_applet = \
            dxpy.find_one_data_object(
                classname='applet',
                name='xcor_only',
                project=dxpy.PROJECT_CONTEXT_ID,
                zero_ok=False,
                more_ok=False,
                return_handler=True)
        return xcor_only_applet.run(
            {"input_tagAlign": tags,
             "paired_end": paired_end},
            name=name)
Esempio n. 33
0
def resolve_file(identifier, server, keypair):
    logger.debug("resolve_file: %s" % (identifier))

    assert identifier, "No file identifier passed to resolve_file"

    m = re.match(r'''^([\w\-\ \.]+):([\w\-\ /\.]+)''', identifier)
    if m:  # fully specified with project:path
        project_identifier = m.group(1)
        file_identifier = m.group(2)
    else:
        logger.debug("Defaulting to the current project")
        project_identifier = DATA_CACHE_PROJECT
        file_identifier = identifier

    project = resolve_project(project_identifier)
    logger.debug("Got project %s" % (project.name))
    logger.debug("Now looking for file %s" % (file_identifier))

    m = re.match(r'''(^[\w\-\ /\.]+)/([\w\-\ \.]+)''', file_identifier)
    if m:
        folder_name = m.group(1)
        if not folder_name.startswith('/'):
            folder_name = '/' + folder_name
        file_name = m.group(2)
    else:
        folder_name = '/fastqs/'
        file_name = file_identifier + '.fastq.gz'

    logger.debug("Looking for file %s in folder %s" % (file_name, folder_name))

    try:
        file_handler = dxpy.find_one_data_object(name=file_name,
                                                 folder=folder_name,
                                                 project=project.get_id(),
                                                 more_ok=False,
                                                 zero_ok=False,
                                                 return_handler=True)
    except:
        logger.debug('%s not found in project %s folder %s' %
                     (file_name, project.get_id(), folder_name))
        try:  # maybe it's just  filename in the default workspace
            file_handler = dxpy.DXFile(dxid=identifier, mode='r')
        except:
            logger.debug('%s not found as a dxid' % (identifier))
            file_handler = resolve_accession(identifier, server, keypair)

    assert file_handler, "Failed to resolve file identifier %s" % (identifier)
    logger.debug("Resolved file identifier %s to %s" %
                 (identifier, file_handler.name))

    return file_handler
Esempio n. 34
0
def xcor_only(tags, paired_end, name='xcor_only'):
    xcor_only_applet = \
        dxpy.find_one_data_object(
            classname='applet',
            name='xcor_only',
            project=dxpy.PROJECT_CONTEXT_ID,
            zero_ok=False,
            more_ok=False,
            return_handler=True)
    return xcor_only_applet.run(
        {
            "input_tagAlign": tags,
            "paired_end": paired_end
        }, name=name)
Esempio n. 35
0
def patch(obs):
    for fob in obs:
        if fob['file_format'] == 'fastq' or fob['status'] == 'revoked':
            continue
        fn = fob['submitted_file_name']
        folder = dxpy.describe(dxpy.find_one_data_object(name=fn.strip('/'), project='project-BQkYKg00F1GP55qQ9Qy00VP0')['id'])['folder']
        newfn = folder+'/'+fn.strip('/')
        print "Patch: %s with %s" % (fn, newfn)
        res = requests.patch(srv+fob['@id'], auth=(id,pw), data=json.dumps({'submitted_file_name': newfn}),headers={'content-type': 'application/json'})
        try:
            res.raise_for_status()
            print "Success"
        except Exception, e:
            print "Failed %s" % e
Esempio n. 36
0
def find_record(run_name, project):
    """ Wrapper to find the sentinel record for a given run_name in the given
    DNAnexus project"""
    try:
        record = dxpy.find_one_data_object(classname="record", name="*{0}*".format(run_name),
                                       project=project, folder="{0}/{1}/{2}".format(RUN_UPLOAD_DEST.rstrip('/'), run_name, REMOTE_RUN_FOLDER),
                                       name_mode="glob", return_handler=True, more_ok=False, zero_ok=False)

        return record

    # Either zero or multiple records found, in cases where we cannot resolve uniquely
    # the upload sentinel, we exit the program with an error
    except dxpy.exceptions.DXSearchError, e:
        sys.exit("Unexpected result when searching for upload sentinel of run {0}. {1}".format(run_name, e))
Esempio n. 37
0
def main(reads1, bwa_aln_params, bwa_version, samtools_version, reads2, reference_tar, key, debug):

	if debug:
		logger.setLevel(logging.DEBUG)
	else:
		logger.setLevel(logging.INFO)


	#for each input fastq decide if it's specified as an ENCODE file accession number (ENCFF*)


	reads1_files = [resolve_file(read, key) for read in reads1]
	if len(reads1_files) > 1:
		pool_applet = dxpy.find_one_data_object(
			classname='applet', name='pool', project=dxpy.PROJECT_CONTEXT_ID,
			zero_ok=False, more_ok=False, return_handler=True)
		logger.debug('reads1_files:%s' %(reads1_files))
		logger.debug('reads1_files ids:%s' %([dxf.get_id() for dxf in reads1_files]))
		logger.debug('reads1_files dxlinks:%s' %([dxpy.dxlink(dxf) for dxf in reads1_files]))
		pool_subjob = pool_applet.run({"inputs": [dxpy.dxlink(dxf) for dxf in reads1_files]})
		reads1_file = pool_subjob.get_output_ref("pooled")
	else:
		reads1_file = reads1_files[0]
	reads2_file = resolve_file(reads2, key)
	reference_tar_file = resolve_file(reference_tar, key)

	logger.info('Resolved reads1 to %s', reads1_file)
	if reads2:
		logger.info('Resolved reads2 to %s', reads2_file)
	logger.info('Resolved reference_tar to %s', reference_tar_file)

	output = {}
	output.update({'reads1': reads1_file})
	if reads2:
		output.update({"reads2": reads2_file})
	output_json = {
		"reads1": reads1_file,
		"reference_tar": reference_tar_file,
		"bwa_aln_params": bwa_aln_params,
		"bwa_version": bwa_version,
		"samtools_version": samtools_version
	}
	if reads2:
		output_json.update({'reads2': reads2_file})
	output.update({'output_JSON': output_json})
	#logger.info('Exiting with output_JSON: %s' %(json.dumps(output)))
	#return {'output_JSON': json.dumps(output)}

	logger.info('Exiting with output: %s' %(output))
	return output
Esempio n. 38
0
def copy_files(fids, project_id, folder):
    new_fids = []
    for file_dict in fids:
        f = dxpy.DXFile(dxid=file_dict['id'], project=file_dict['project'])
        fn = f.describe()['name']

        # Check to see if file already exists.
        found_file = dxpy.find_one_data_object(classname='file', project=project_id, folder=folder, zero_ok=True, name=fn)
        if found_file is None:
            new_fids += [dxpy.dxlink(f.clone(project_id, folder))]
        else:
            new_fids += [dxpy.dxlink(found_file)]

    return new_fids
def find_record(run_name, project):
    """ Wrapper to find the sentinel record for a given run_name in the given
    DNAnexus project"""
    try:
        record = dxpy.find_one_data_object(classname="record", name="*{0}*".format(run_name),
                                       project=project, folder="{0}/{1}/{2}".format(RUN_UPLOAD_DEST.rstrip('/'), run_name, REMOTE_RUN_FOLDER),
                                       name_mode="glob", return_handler=True, more_ok=False, zero_ok=False)

        return record

    # Either zero or multiple records found, in cases where we cannot resolve uniquely
    # the upload sentinel, we exit the program with an error
    except dxpy.exceptions.DXSearchError, e:
        sys.exit("Unexpected result when searching for upload sentinel of run {0}. {1}".format(run_name, e))
Esempio n. 40
0
def find_reference_file_by_name(reference_name, project_name):
    '''Looks up a reference file by name in the project that holds common tools. From Joe Dale's code.'''
    project = dxpy.find_one_project(name=project_name, name_mode='exact', return_handler=False)
    cached = '*'
    if (reference_name, project['id']) not in REFERENCE_FILES:
        found = dxpy.find_one_data_object(classname="file", name=reference_name,
                                          project=project['id'],
                                          recurse=True,
                                          zero_ok=False, more_ok=False, return_handler=True)
        REFERENCE_FILES[(reference_name, project['id'])] = found
        cached = ''

    print cached + "Resolved %s to %s" % (reference_name, REFERENCE_FILES[(reference_name, project['id'])].get_id())
    return dxpy.dxlink(REFERENCE_FILES[(reference_name, project['id'])])
Esempio n. 41
0
def resolve_accession(accession, key):
    logger.debug("Looking for accession %s" % (accession))

    if not re.match(r'''^ENCFF\d{3}[A-Z]{3}''', accession):
        logger.warning("%s is not a valid accession format" % (accession))
        return None

    if DATA_CACHE_PROJECT:
        logger.debug('Looking for cache project %s' % (DATA_CACHE_PROJECT))
        try:
            project_handler = resolve_project(DATA_CACHE_PROJECT)
            snapshot_project = project_handler
        except:
            logger.error("Cannot find cache project %s" % (DATA_CACHE_PROJECT))
            snapshot_project = None

        logger.debug('Cache project: %s' % (snapshot_project))

        if snapshot_project:
            try:
                accession_search = accession + '*'
                logger.debug('Looking recursively for %s in %s' %
                             (accession_search, snapshot_project.name))
                file_handler = dxpy.find_one_data_object(
                    name=accession_search,
                    name_mode='glob',
                    more_ok=False,
                    classname='file',
                    recurse=True,
                    return_handler=True,
                    folder='/',
                    project=snapshot_project.get_id())
                logger.debug('Got file handler for %s' % (file_handler.name))
                return file_handler
            except:
                logger.debug("Cannot find accession %s in project %s" %
                             (accession, snapshot_project))

    # we're here because we couldn't find the cache or couldn't find the file in the cache, so look in AWS

    dx_file = s3cp(
        accession,
        key)  #this returns a link to the file in the applet's project context

    if not dx_file:
        logger.warning('Cannot find %s.  Giving up.' % (accession))
        return None
    else:
        return dx_file
Esempio n. 42
0
def resolve_file(identifier, server, keypair):
    logger.debug("resolve_file: %s" % (identifier))

    assert identifier, "No file identifier passed to resolve_file"

    m = re.match(r'''^([\w\-\ \.]+):([\w\-\ /\.]+)''', identifier)
    if m:  # fully specified with project:path
        project_identifier = m.group(1)
        file_identifier = m.group(2)
    else:
        logger.debug("Defaulting to the current project")
        project_identifier = DATA_CACHE_PROJECT
        file_identifier = identifier

    project = resolve_project(project_identifier)
    logger.debug("Got project %s" % (project.name))
    logger.debug("Now looking for file %s" % (file_identifier))

    m = re.match(r'''(^[\w\-\ /\.]+)/([\w\-\ \.]+)''', file_identifier)
    if m:
        folder_name = m.group(1)
        if not folder_name.startswith('/'):
            folder_name = '/' + folder_name
        file_name = m.group(2)
    else:
        folder_name = '/fastqs/'
        file_name = file_identifier + '.fastq.gz'

    logger.debug("Looking for file %s in folder %s" % (file_name, folder_name))

    try:
        file_handler = dxpy.find_one_data_object(
            name=file_name, folder=folder_name, project=project.get_id(),
            more_ok=False, zero_ok=False, return_handler=True)
    except:
        logger.debug(
            '%s not found in project %s folder %s'
            % (file_name, project.get_id(), folder_name))
        try:  # maybe it's just  filename in the default workspace
            file_handler = dxpy.DXFile(dxid=identifier, mode='r')
        except:
            logger.debug('%s not found as a dxid' % (identifier))
            file_handler = resolve_accession(identifier, server, keypair)

    assert file_handler, "Failed to resolve file identifier %s" % (identifier)
    logger.debug(
        "Resolved file identifier %s to %s" % (identifier, file_handler.name))

    return file_handler
 def get_barcode_stats(self, barcode):
     """
     Loads the JSON in a ${barcode}_stats.json file in the DNAnexus project (usually in the qc
     folder). 
     """
     filename = barcode + "_stats.json"
     # In the call to dxpy.find_one_data_object() below, I'd normally set the
     # more_ok parameter to False, but this blows-up in Python 3.7 - giving me a RuntimeError.
     # So, I just won't set it for now. I think dxpy is still mainly a Python 2.7 library and
     # can break in later version of Python3.
     file_id = dxpy.find_one_data_object(zero_ok=False,
                                         project=self.dx_project_id,
                                         name=filename)["id"]
     json_data = json.loads(dxpy.open_dxfile(file_id).read())
     return json_data
Esempio n. 44
0
def pooled(files):
    pool_applet = dxpy.find_one_data_object(classname='applet',
                                            name='pool',
                                            project=dxpy.PROJECT_CONTEXT_ID,
                                            zero_ok=False,
                                            more_ok=False,
                                            return_handler=True)
    logger.debug('input files:%s' % (files))
    logger.debug('input file ids:%s' % ([dxf.get_id() for dxf in files]))
    logger.debug('input files dxlinks:%s' %
                 ([dxpy.dxlink(dxf) for dxf in files]))
    pool_subjob = pool_applet.run(
        {"inputs": [dxpy.dxlink(dxf) for dxf in files]})
    pooled_file = pool_subjob.get_output_ref("pooled")
    return pooled_file
Esempio n. 45
0
def test_mapping():
    dxpy.set_workspace_id('project-BpBjyqQ0Jk0Xv2B11Q8P6X59')
    applet = dxpy.find_one_data_object(
        name='bwa_mem_fastq_read_mapper',
        classname='applet',
        return_handler=True,
        zero_ok=False,
        project='project-B406G0x2fz2B3GVk65200003')
    applet.run({
        'genomeindex_targz':
        dxpy.dxlink('file-B6qq53v2J35Qyg04XxG0000V'),
        'reads_fastqgz':
        dxpy.dxlink('file-BpBjzFQ0Jk0Xk73YqQgJKg9Z'),
        'reads2_fastqgz':
        dxpy.dxlink('file-BpBk0400Jk0Xk73YqQgJKg9f')
    })
Esempio n. 46
0
def xcor_only(tags, paired_end, spp_version=None, name='xcor_only'):
    xcor_only_applet = \
        dxpy.find_one_data_object(
            classname='applet',
            name='xcor_only',
            project=dxpy.PROJECT_CONTEXT_ID,
            zero_ok=False,
            more_ok=False,
            return_handler=True)
    applet_input = {
        "input_tagAlign": tags,
        "paired_end": paired_end
    }
    if spp_version:
        applet_input.update({'spp_version': spp_version})
    return xcor_only_applet.run(applet_input, name=name)
Esempio n. 47
0
def resolve_accession(accession, server, keypair):
    logger.debug("Looking for accession %s" % (accession))

    if not re.match(r'''^ENCFF\d{3}[A-Z]{3}''', accession):
        logger.warning("%s is not a valid accession format" % (accession))
        return None

    if DATA_CACHE_PROJECT:
        logger.debug('Looking for cache project %s' % (DATA_CACHE_PROJECT))
        try:
            project_handler = resolve_project(DATA_CACHE_PROJECT)
            snapshot_project = project_handler
        except:
            logger.error("Cannot find cache project %s" % (DATA_CACHE_PROJECT))
            snapshot_project = None

        logger.debug('Cache project: %s' % (snapshot_project))

        if snapshot_project:
            try:
                accession_search = accession + '*'
                logger.debug(
                    'Looking recursively for %s in %s'
                    % (accession_search, snapshot_project.name))
                file_handler = dxpy.find_one_data_object(
                    name=accession_search, name_mode='glob', more_ok=False,
                    classname='file', recurse=True, return_handler=True,
                    folder='/', project=snapshot_project.get_id())
                logger.debug('Got file handler for %s' % (file_handler.name))
                return file_handler
            except:
                logger.debug(
                    "Cannot find accession %s in project %s"
                    % (accession, snapshot_project))

    # we're here because we couldn't find the cache or couldn't find the file
    # in the cache, so look in AWS

    # this returns a link to the file in the applet's project context
    dx_file = s3_dxcp(accession, server, keypair)

    if not dx_file:
        logger.warning('Cannot find %s.  Giving up.' % (accession))
        return None
    else:
        return dx_file
def find_reference_file_by_name(reference_name, applets_project_id):
    '''Looks up a reference file by name in the project that holds common tools. From Joe Dale's code.'''
    cached = '*'
    if (reference_name, applets_project_id) not in REFERENCE_FILES:
        found = dxpy.find_one_data_object(classname="file",
                                          name=reference_name,
                                          project=applets_project_id,
                                          folder='/Reference Data',
                                          recurse=True,
                                          zero_ok=False,
                                          more_ok=False,
                                          return_handler=True)
        REFERENCE_FILES[(reference_name, applets_project_id)] = found
        cached = ''

    print cached + "Resolved %s to %s" % (reference_name, REFERENCE_FILES[
        (reference_name, applets_project_id)].get_id())
    return dxpy.dxlink(REFERENCE_FILES[(reference_name, applets_project_id)])
 def get_run_details_json(self):
     """
     Retrieves the JSON object for the stats in the file named run_details.json in the project 
     specified by self.dx_project_id.
 
     Returns: 
         JSON object of the run details.
     """
     run_details_filename = "run_details.json"
     run_details_json_id = dxpy.find_one_data_object(
         more_ok=False,
         zero_ok=True,
         project=self.dx_project_id,
         name=run_details_filename)["id"]
     json_data = json.loads(
         dxpy.open_dxfile(dxid=run_details_json_id).read())
     #dxpy.download_dxfile(show_progress=True,dxid=run_details_json_id,project=self.dx_project_id,filename=output_name)
     return json_data
def copy_files(fids, project, folder):
    new_fids = []
    for fid in fids:
        (pid, fid) = fid.split(':')
        f = dxpy.DXFile(dxid=fid, project=pid)
        fn = f.describe()['name']
        found_file = dxpy.find_one_data_object(classname='file',
                                               project=project.get_id(),
                                               folder=folder,
                                               zero_ok=True,
                                               name=fn)

        if found_file is None:
            new_fids += [dxpy.dxlink(f.clone(project.get_id(), folder))]
        else:
            new_fids += [dxpy.dxlink(found_file)]

    return new_fids
Esempio n. 51
0
def copy_files(fids, project_id, folder):
    new_fids = []
    for file_dict in fids:
        f = dxpy.DXFile(dxid=file_dict['id'], project=file_dict['project'])
        fn = f.describe()['name']

        # Check to see if file already exists.
        found_file = dxpy.find_one_data_object(classname='file',
                                               project=project_id,
                                               folder=folder,
                                               zero_ok=True,
                                               name=fn)
        if found_file is None:
            new_fids += [dxpy.dxlink(f.clone(project_id, folder))]
        else:
            new_fids += [dxpy.dxlink(found_file)]

    return new_fids
def resolve_dx_file(identifier):
    try:
        handler = dxpy.get_handler(identifier)
    except dxpy.DXError:
        try:
            handler = dxpy.find_one_data_object(
                classname='file',
                name=identifier,
                return_handler=True,
                zero_ok=False,
                more_ok=False)
        except dxpy.DXSearchError:
            logging.error('Failed to resolve control %s to unique dx object.  ID or name does not exist or multiple files of that name were found.' % (str(identifier)))
            return None
        else:
            return handler
    else:
        return handler
def resolve_dx_file(identifier):
    try:
        handler = dxpy.get_handler(identifier)
    except dxpy.DXError:
        try:
            handler = dxpy.find_one_data_object(
                classname='file',
                name=identifier,
                return_handler=True,
                zero_ok=False,
                more_ok=False)
        except dxpy.DXSearchError:
            logging.error('Failed to resolve control %s to unique dx object.  ID or name does not exist or multiple files of that name were found.' % (str(identifier)))
            return None
        else:
            return handler
    else:
        return handler
    def get_sample_stats_json(self, barcode=None):
        """
        .. deprecated:: 0.1.0
           GSSC has removed the sample_stats.json file since the entire folder it was in has been 
           removed. Use :meth:`get_barcode_stats` instead. 
     
        Retrieves the JSON object for the stats in the file named sample_stats.json in the project 
        specified by self.dx_project_id.  This file is located in the DNAnexus folder stage\d_qc_report.
    
        Args:
            barcode: `str`. The barcode for the sample. Currently, the sample_stats.json file is of the 
                following form when there isn't a genome mapping: 
    
                [{"Sample name": "AGTTCC"}, {"Sample name": "CAGATC"}, {"Sample name": "GCCAAT"}, ...}]. 
    
                When there is a mapping, each dictionary has many more keys in addition to the "Sample name" one.
    
        Returns: 
            `list` of dicts if barcode=None, otherwise a dict for the given barcode.
        """
        sample_stats_json_filename = "sample_stats.json"
        sample_stats_json_id = dxpy.find_one_data_object(
            more_ok=False,
            zero_ok=False,
            project=self.dx_project_id,
            name=sample_stats_json_filename)["id"]
        #dxpy.download_dxfile(dxid=sample_stats_json_id,project=self.dx_project_id,filename=sample_stats_json_filename)
        json_data = json.loads(dxpy.open_dxfile(sample_stats_json_id).read())

        if not barcode:
            return json_data

        for d in json_data:  #d is a dictionary
            sample_barcode = d["Sample name"]
            if sample_barcode == barcode:
                return d
        if barcode:
            raise DnanexusBarcodeNotFound(
                "Barcode {barcode} for {library_name} not found in {sample_stats_json_filename} in project {project}."
                .format(barcode=barcode,
                        library_name=self.library_name,
                        sample_stats_json_filename=sample_stats_json_filename,
                        project=self.dx_project_id))
Esempio n. 55
0
def find_reference_file_by_name(reference_name, project_name):
    '''Looks up a reference file by name in the project that holds common tools. From Joe Dale's code.'''
    project = dxpy.find_one_project(name=project_name,
                                    name_mode='exact',
                                    return_handler=False)
    cached = '* '
    if (reference_name, project['id']) not in REFERENCE_FILES:
        found = dxpy.find_one_data_object(classname="file",
                                          name=reference_name,
                                          project=project['id'],
                                          recurse=True,
                                          zero_ok=False,
                                          more_ok=False,
                                          return_handler=True)
        REFERENCE_FILES[(reference_name, project['id'])] = found
        cached = ''

    #print >> sys.stderr, cached + "Resolved %s to %s" % \
    #                                                (reference_name, REFERENCE_FILES[(reference_name, project['id'])].get_id())
    return dxpy.dxlink(REFERENCE_FILES[(reference_name, project['id'])])
Esempio n. 56
0
def patch(obs):
    for fob in obs:
        if fob['file_format'] == 'fastq' or fob['status'] == 'revoked':
            continue
        fn = fob['submitted_file_name']
        folder = dxpy.describe(
            dxpy.find_one_data_object(
                name=fn.strip('/'),
                project='project-BQkYKg00F1GP55qQ9Qy00VP0')['id'])['folder']
        newfn = folder + '/' + fn.strip('/')
        print "Patch: %s with %s" % (fn, newfn)
        res = requests.patch(srv + fob['@id'],
                             auth=(id, pw),
                             data=json.dumps({'submitted_file_name': newfn}),
                             headers={'content-type': 'application/json'})
        try:
            res.raise_for_status()
            print "Success"
        except Exception, e:
            print "Failed %s" % e
Esempio n. 57
0
def spp(experiment,
        control,
        xcor_scores,
        chrom_sizes,
        bigbed=False,
        as_file=None):
    spp_applet = dxpy.find_one_data_object(classname='applet',
                                           name='spp',
                                           project=dxpy.PROJECT_CONTEXT_ID,
                                           zero_ok=False,
                                           more_ok=False,
                                           return_handler=True)
    spp_input = {
        "experiment": experiment,
        "control": control,
        "xcor_scores_input": xcor_scores,
        "bigbed": bigbed,
        "chrom_sizes": chrom_sizes
    }
    if bigbed and as_file:
        spp_input.update({"as_file": as_file})
    return spp_applet.run(spp_input)
Esempio n. 58
0
def main(pop1, pop2, skip=25, recals=2):
    # Split your work into parallel tasks.  As an example, the
    # following generates 10 subjobs running with the same dummy
    # input.
    psmc20_id = (
        "project-B53fX06gYqYbb6B87kgQ0007"
    )  # Dxpy.find_one_project(zero_ok=True, more_ok=False, name="PSMC_20")['id']
    #    print psmc20_id, dxpy.WORKSPACE_ID
    pipeline = dxpy.find_one_data_object(
        name="PSMC-pipeline", name_mode="regexp", project=psmc20_id, return_handler=True
    )
    files1 = {}
    for result in dxpy.find_data_objects(
        name=pop1, name_mode="regexp", classname="file", folder="/ConsensusSequences", project=psmc20_id
    ):
        id = result["id"]
        name = dxpy.describe(id)["name"]
        files1[name] = id
    files2 = {}
    if pop1 != pop2:
        for result in dxpy.find_data_objects(
            name=pop2, name_mode="regexp", classname="file", folder="/ConsensusSequences", project=psmc20_id
        ):
            id = result["id"]
            name = dxpy.describe(id)["name"]
            files2[name] = id
    if len(files2) == 0 and pop1 != pop2:
        return {}
    appjobs = []
    if len(files2) == 0:
        # Single population processing
        subjobs = []
        fn1sort = files1.keys()
        fn1sort.sort()
        for i in range(len(fn1sort)):
            for j in range(i + 1, len(fn1sort)):
                outroot = pop1 + "." + str(i + 1) + "." + pop1 + "." + str(j + 1)
                applet_in = {
                    "cons1": dxpy.dxlink(files1[fn1sort[i]]),
                    "cons2": dxpy.dxlink(files1[fn1sort[j]]),
                    "outroot": outroot,
                    "skip": skip,
                    "recalnums": recals,
                }
                # appjobs.append(pipeline.run(applet_input=applet_in))
                print "dx run -y --folder /psmcfa -icons1=/ConsensusSequences/" + fn1sort[
                    i
                ] + " -icons2=/ConsensusSequences/" + fn1sort[j] + " -ioutroot=" + outroot + " -iskip=" + str(
                    skip
                ) + " -irecalnums=" + str(
                    recals
                ) + " PSMC-pipeline"
    elif len(files2) > 0:
        subjobs = []
        fn1sort = files1.keys()
        fn2sort = files2.keys()
        fn1sort.sort()
        fn2sort.sort()
        for i in range(len(fn1sort)):
            for j in range(len(fn2sort)):
                outroot = pop1 + "." + str(i + 1) + "." + pop2 + "." + str(j + 1)
                applet_in = {
                    "cons1": dxpy.dxlink(files1[fn1sort[i]]),
                    "cons2": dxpy.dxlink(files2[fn2sort[j]]),
                    "outroot": outroot,
                    "skip": skip,
                    "recalnums": recals,
                }
                # appjobs.append(pipeline.run(applet_input=applet_in))
                print "dx run -y --folder /psmcfa -icons1=/ConsensusSequences/" + fn1sort[
                    i
                ] + " -icons2=/ConsensusSequences/" + fn2sort[j] + " -ioutroot=" + outroot + " -iskip=" + str(
                    skip
                ) + " -irecalnums=" + str(
                    recals
                ) + " PSMC-pipeline"

    #    for job in app1jobs.keys():
    #        print job
    #        print app1jobs[job]
    #        print(app1jobs[job].describe())
    #        print app1jobs[job].get_output_ref("psmcfa")
    #        print app1jobs[job].get_output_ref("psmcfa").describe()

    # The following line creates the job that will perform the
    # "postprocess" step of your app.  We've given it an input field
    # that is a list of job-based object references created from the
    # "process" jobs we just created.  Assuming those jobs have an
    # output field called "output", these values will be passed to the
    # "postprocess" job.  Because these values are not ready until the
    # "process" jobs finish, the "postprocess" job WILL NOT RUN until
    # all job-based object references have been resolved (i.e. the
    # jobs they reference have finished running).
    #
    # If you do not plan to have the "process" jobs create output that
    # the "postprocess" job will require, then you can explicitly list
    # the dependencies to wait for those jobs to finish by setting the
    # "depends_on" field to the list of subjobs to wait for (it
    # accepts either dxpy handlers or string IDs in the list).  We've
    # included this parameter in the line below as well for
    # completeness, though it is unnecessary if you are providing
    # job-based object references in the input that refer to the same
    # set of jobs.
    #    of1 = {}
    #    for j in app1jobs:
    #        of1[j] = app1jobs[j].get_output_ref("psmcfa")
    #    postprocess_job = dxpy.new_dxjob(fn_input={"files1":of1, "files2":[]}, fn_name="postprocess")

    # If you would like to include any of the output fields from the
    # postprocess_job as the output of your app, you should return it
    # here using a job-based object reference.  If the output field in
    # the postprocess function is called "answer", you can pass that
    # on here as follows:
    #
    # return { "app_output_field": postprocess_job.get_output_ref("answer"), ...}
    #
    # Tip: you can include in your output at this point any open
    # objects (such as gtables) which will be closed by a job that
    # finishes later.  The system will check to make sure that the
    # output object is closed and will attempt to clone it out as
    # output into the parent container only after all subjobs have
    # finished.
    psmcfaFiles = []
    psmcFiles = []
    for job in appjobs:
        psmcfaFiles.append(job.get_output_ref("outfile1"))
        psmcFiles.append(job.get_output_ref("outfile2"))

    output = {"psmcfaFiles": psmcfaFiles, "psmcFiles": psmcFiles}

    return output