def copy_file(orig_uuid): """Helper function that copies a file if given the original file's UUID :param orig_uuid: UUID of file to copy. :type orig_uuid: str. :returns: UUID of newly copied file. """ orig_fsi = read(orig_uuid) newfile_uuid = None try: newfile_uuid = create(orig_fsi.source, orig_fsi.sharename, orig_fsi.filetype, permanent=is_permanent(orig_uuid)) import_file(newfile_uuid, refresh=True) except AttributeError: pass return newfile_uuid
def addIGVSamples(fields, results_samp, annot_samples=None): """creates phenotype file for IGV :param samples: Solr results for samples to be included :type samples: Array. :param annot_samples: includes annotation files included with solr results :type annot_samples: Array """ # creates human readable indexes of fields to iterate over fields_dict = {} for i in fields: find_index = i.find("_Characteristics_") if find_index > -1: new_key = i.split("_Characteristics_")[0] fields_dict[i] = new_key # Creating temp file to enter into file_store tempsampname = tempfile.NamedTemporaryFile(delete=False) # writing header to sample file tempsampname.write("#sampleTable" + "\n") # writing column names to sample file col_names = "Linking_id" for k, v in fields_dict.iteritems(): col_names = col_names + '\t' + v tempsampname.write(col_names + "\n") # iterating over sample files pheno_results = get_sample_lines(fields_dict, results_samp) tempsampname.write(pheno_results) # if annotations are not null if annot_samples: pheno_annot = get_sample_lines(fields_dict, annot_samples) tempsampname.write(pheno_annot) # closing temp file tempsampname.close() # getting file_store_uuid filestore_uuid = create(tempsampname.name, permanent=True, filetype="txt") filestore_item = import_file(filestore_uuid, permanent=True, refresh=True) # file to rename temp_file = filestore_item.datafile.name.split('/') temp_file = temp_file[len(temp_file) - 1] + '.txt' # rename file by way of file_store filestore_item = rename(filestore_uuid, temp_file) # getting file information based on file_uuids curr_fs = FileStoreItem.objects.get(uuid=filestore_uuid) # full path to selected UUID File curr_url = get_full_url(curr_fs.get_datafile_url()) # delete temp file os.unlink(tempsampname.name) return curr_url
def copy_file(original_item_uuid): """Creates a copy of a FileStoreItem with the given UUID""" try: original_item = FileStoreItem.objects.get(uuid=original_item_uuid) except (FileStoreItem.DoesNotExist, FileStoreItem.MultipleObjectsReturned) as exc: logger.error("Failed to copy FileStoreItem with UUID '%s': %s", original_item_uuid, exc) return None try: new_item = FileStoreItem.objects.create( source=original_item.source, filetype=original_item.filetype) except AttributeError: return None else: import_file(new_item.uuid, refresh=True) return new_item.uuid
def copy_file(orig_uuid): """Helper function that copies a file if given the original file's UUID :param orig_uuid: UUID of file to copy. :type orig_uuid: str. :returns: UUID of newly copied file. """ orig_fsi = read(orig_uuid) newfile_uuid = None try: newfile_uuid = create( orig_fsi.source, orig_fsi.sharename, orig_fsi.filetype, permanent=is_permanent(orig_uuid) ) import_file(newfile_uuid, refresh=True) except AttributeError: pass return newfile_uuid
def copy_file(orig_uuid): """Helper function that copies a file if given the original file's UUID :param orig_uuid: UUID of file to copy. :type orig_uuid: str. :returns: UUID of newly copied file. """ newfile_uuid = None try: orig_fsi = FileStoreItem.objects.get(uuid=orig_uuid) except (FileStoreItem.DoesNotExist, FileStoreItem.MultipleObjectsReturned) as e: logger.error("Couldn't properly fetch FileStoreItem: %s", e) else: try: newfile_uuid = create(orig_fsi.source, orig_fsi.sharename, orig_fsi.filetype, permanent=is_permanent(orig_uuid)) import_file(newfile_uuid, refresh=True) except AttributeError: pass return newfile_uuid
def run(self, path, isa_archive=None, preisa_archive=None): """If path is a file it will be treated as an ISArchive, if it is a directory it will be treated as an extracted ISArchive. Assumes that the archive extracts into a subdirectory named <archive> if the ISArchive is called <archive>.zip. """ # reset all variables self._current_investigation = None self._current_study = None self._current_assay = None self._current_node = None self._previous_node = None self._current_attribute = None self._current_protocol_reference = None self._current_reader = None self._current_file = None self._current_file_name = None # 1. test if archive needs to be extracted and extract if necessary if not os.path.isdir(path): # assign to isa_archive if it's an archive anyway isa_archive = path logger.info( "Supplied path \"" + path + "\" is not a directory. Assuming " "ISArchive file.") try: # TODO: do we need a random subdirectory here? extract_path = tempfile.mkdtemp() with ZipFile(path, 'r') as zip: # test if any paths are relative or absolute and outside # the extract path for name in zip.namelist(): if name.startswith("..") or name.startswith("/"): logger.exception( "Unable to extract assumed ISArchive file \"" + path + "\" due to illegal file path: " + name ) # extract archive zip.extractall(extract_path) first_file = zip.namelist()[0] # test if first entry in zip file is a path if first_file.endswith("/"): # add archive subdirectory to path extract_path = os.path.join(extract_path, first_file) elif re.search(r'/', first_file): ind = string.find(first_file, '/') extract_path = os.path.join( extract_path, first_file[:ind] ) logger.info( "ISArchive extracted to \"" + extract_path + "\"." ) path = extract_path except: logger.exception( "Unable to extract assumed ISArchive file \"" + path + "\".") # 2. identify investigation file try: investigation_file_name = glob.glob("%s/i*.txt" % path).pop() except IndexError as exception: logger.exception( "Unable to identify ISArchive file in \"" + path + "\".") raise exception # 3. parse investigation file and identify study files and # corresponding assay files self._parse_investigation_file(investigation_file_name) # 4. parse all study files and corresponding assay files if self._current_investigation is not None: # identify studies associated with this investigation for study in self._current_investigation.study_set.all(): # parse study file self._current_assay = None study_file_name = os.path.join(path, study.file_name) if data_set_manager.tasks.fix_last_col(study_file_name): self._parse_study_file(study, study_file_name) for assay in study.assay_set.all(): # parse assay file self._previous_node = None assay_file_name = os.path.join(path, assay.file_name) if data_set_manager.tasks.fix_last_col( assay_file_name): self._parse_assay_file( study, assay, assay_file_name) else: logger.exception( "No investigation was identified when parsing investigation " "file \"" + investigation_file_name + "\"") raise Exception() # 5. assign ISA-Tab archive and pre-ISA-Tab archive if present try: self._current_investigation.isarchive_file = create(isa_archive) import_file(self._current_investigation.isarchive_file, refresh=True) except: pass if preisa_archive: self._current_investigation.pre_isarchive_file = \ create(preisa_archive) import_file(self._current_investigation.pre_isarchive_file, refresh=True) self._current_investigation.save() return self._current_investigation
def run(self, path, isa_archive=None, preisa_archive=None): """If path is a file it will be treated as an ISArchive, if it is a directory it will be treated as an extracted ISArchive. Assumes that the archive extracts into a subdirectory named <archive> if the ISArchive is called <archive>.zip. """ # reset all variables self._current_investigation = None self._current_study = None self._current_assay = None self._current_node = None self._previous_node = None self._current_attribute = None self._current_protocol_reference = None self._current_reader = None self._current_file = None self._current_file_name = None # 1. test if archive needs to be extracted and extract if necessary if not os.path.isdir(path): # assign to isa_archive if it's an archive anyway isa_archive = path logger.info("Supplied path \"" + path + "\" is not a directory. Assuming " "ISArchive file.") try: # TODO: do we need a random subdirectory here? extract_path = tempfile.mkdtemp() with ZipFile(path, 'r') as zip: # test if any paths are relative or absolute and outside # the extract path for name in zip.namelist(): if name.startswith("..") or name.startswith("/"): logger.exception( "Unable to extract assumed ISArchive file \"" + path + "\" due to illegal file path: " + name) # extract archive zip.extractall(extract_path) first_file = zip.namelist()[0] # test if first entry in zip file is a path if first_file.endswith("/"): # add archive subdirectory to path extract_path = os.path.join(extract_path, first_file) elif re.search(r'/', first_file): ind = string.find(first_file, '/') extract_path = os.path.join(extract_path, first_file[:ind]) logger.info("ISArchive extracted to \"" + extract_path + "\".") path = extract_path except: logger.exception( "Unable to extract assumed ISArchive file \"" + path + "\".") # 2. identify investigation file try: investigation_file_name = glob.glob("%s/i*.txt" % path).pop() except IndexError as exception: logger.exception("Unable to identify ISArchive file in \"" + path + "\".") raise exception # 3. parse investigation file and identify study files and # corresponding assay files self._parse_investigation_file(investigation_file_name) # 4. parse all study files and corresponding assay files if self._current_investigation is not None: # identify studies associated with this investigation for study in self._current_investigation.study_set.all(): # parse study file self._current_assay = None study_file_name = os.path.join(path, study.file_name) if data_set_manager.tasks.fix_last_col(study_file_name): self._parse_study_file(study, study_file_name) for assay in study.assay_set.all(): # parse assay file self._previous_node = None assay_file_name = os.path.join(path, assay.file_name) if data_set_manager.tasks.fix_last_col( assay_file_name): self._parse_assay_file(study, assay, assay_file_name) else: logger.exception( "No investigation was identified when parsing investigation " "file \"" + investigation_file_name + "\"") raise Exception() # 5. assign ISA-Tab archive and pre-ISA-Tab archive if present try: self._current_investigation.isarchive_file = create(isa_archive) import_file(self._current_investigation.isarchive_file, refresh=True) except: pass if preisa_archive: self._current_investigation.pre_isarchive_file = \ create(preisa_archive) import_file(self._current_investigation.pre_isarchive_file, refresh=True) self._current_investigation.save() return self._current_investigation
def create_igv_session(genome, uuids, is_file_uuid=False): """ Creates session file for selected file uuids, returns newly created filestore uuid :param is_file_uuid: :param genome: Genome to be used in session file i.e. hg18, dm3 :type genome: string. :param uuids: Array of UUIDs to be used :type uuids: array. :param uuids: Host URL i.e. 127.0.0.1:8000 :type uuids: string """ # Create IGV Session file and put into Filestore """ http://www.postneo.com/projects/pyxml/ <?xml version="1.0" encoding="UTF-8"?> <Global genome="hg18" locus="EGFR" version="3"> <Resources> <Resource name="RNA Genes" path="http://www.broadinstitute.org/igvdata/tcga/gbm/GBM_batch1-8_level3_exp.txt.recentered.080820.gct.tdf"/> <Resource name="RNA Genes" path="http://www.broadinstitute.org/igvdata/annotations/hg18/rna_genes.bed"/> <Resource name="sno/miRNA" path="http://www.broadinstitute.org/igvdata/tcga/gbm/Sample_info.txt"/> </Resources> </Global> """ logger.debug("visualization_manager.create_igv_session called") # Create the minidom document doc = Document() # Create the <wml> base element xml = doc.createElement("Global") xml.setAttribute("genome", genome) xml.setAttribute("locus", "All") xml.setAttribute("version", "4") doc.appendChild(xml) # Add Resources xml_resources = doc.createElement("Resources") xml.appendChild(xml_resources) # get paths to url for samp in uuids: # gets filestore item curr_name, curr_url = get_file_name(samp, is_file_uuid=is_file_uuid) logger.debug('New resource: ' + curr_name + ' - ' + curr_url) # What to do if fs does not exist? if curr_name: # creates Resource element res = doc.createElement("Resource") res.setAttribute("name", curr_name) res.setAttribute("path", curr_url) xml_resources.appendChild(res) # Creating temp file to enter into file_store tempfilename = tempfile.NamedTemporaryFile(delete=False) tempfilename.write(doc.toprettyxml(indent=" ")) tempfilename.close() # getting file_store_uuid filestore_uuid = create(tempfilename.name, filetype="xml") filestore_item = import_file(filestore_uuid, refresh=True) # file to rename temp_name = filestore_item.datafile.name.split('/') temp_name = temp_name[len(temp_name) - 1] + '.xml' # rename file by way of file_store filestore_item = rename(filestore_uuid, temp_name) # delete temp file os.unlink(tempfilename.name) # Url for session file fs_url = get_full_url(filestore_item.get_datafile_url()) # IGV url for automatic launch of Java Webstart igv_url = "http://www.broadinstitute.org/igv/projects/current/igv.php" \ "?sessionURL=" + fs_url return igv_url
def add_igv_samples(fields, results_samp, annot_samples=None): """creates phenotype file for IGV :param samples: Solr results for samples to be included :type samples: Array. :param annot_samples: includes annotation files included with solr results :type annot_samples: Array """ # creates human readable indexes of fields to iterate over fields_dict = {} for i in fields: find_index = i.find("_Characteristics_") if find_index > -1: new_key = i.split("_Characteristics_")[0] fields_dict[i] = new_key # Creating temp file to enter into file_store temp_sample_name = tempfile.NamedTemporaryFile(delete=False) # writing header to sample file temp_sample_name.write("#sampleTable" + "\n") # writing column names to sample file col_names = "Linking_id" for k, v in fields_dict.iteritems(): col_names = col_names + '\t' + v temp_sample_name.write(col_names + "\n") # iterating over sample files pheno_results = get_sample_lines(fields_dict, results_samp) try: temp_sample_name.write(pheno_results) except UnicodeEncodeError as e: logger.error("Could not write results to file: %s. " "Trying again with the content to write encoded " "properly." % e) temp_sample_name.write(pheno_results.encode("utf-8")) # if annotations are not null if annot_samples: pheno_annot = get_sample_lines(fields_dict, annot_samples) temp_sample_name.write(pheno_annot) # closing temp file temp_sample_name.close() # getting file_store_uuid filestore_uuid = create(temp_sample_name.name, filetype="txt") filestore_item = import_file(filestore_uuid, refresh=True) # file to rename temp_file = filestore_item.datafile.name.split('/') temp_file = temp_file[len(temp_file) - 1] + '.txt' # rename file by way of file_store filestore_item = rename(filestore_uuid, temp_file) # getting file information based on file_uuids curr_fs = FileStoreItem.objects.get(uuid=filestore_uuid) # full path to selected UUID File curr_url = get_full_url(curr_fs.get_datafile_url()) # delete temp file os.unlink(temp_sample_name.name) return curr_url
def create_igv_session_annot(genome, uuids, annot_uuids=None, samp_file=None): """Creates session file for selected file uuids, returns newly created filestore uuid :param genome: Genome to be used in session file i.e. hg18, dm3 :type genome: string. :param uuids: Array of UUIDs to be used :type uuids: array. :param uuids: Host URL i.e. 127.0.0.1:8000 :type uuids: string """ # Create IGV Session file and put into Filestore """ http://www.postneo.com/projects/pyxml/ <?xml version="1.0" encoding="UTF-8"?> <Global genome="hg18" locus="EGFR" version="3"> <Resources> <Resource name="RNA Genes" path="http://www.broadinstitute.org/igvdata/tcga/gbm/GBM_batch1-8_level3_exp.txt.recentered.080820.gct.tdf"/> <Resource name="RNA Genes" path="http://www.broadinstitute.org/igvdata/annotations/hg18/rna_genes.bed"/> <Resource name="sno/miRNA" path="http://www.broadinstitute.org/igvdata/tcga/gbm/Sample_info.txt"/> </Resources> </Global> """ # Create the minidom document doc = Document() # Create the <wml> base element xml = doc.createElement("Global") xml.setAttribute("genome", genome) xml.setAttribute("locus", "All") xml.setAttribute("version", "4") doc.appendChild(xml) # Add Resources xml_resources = doc.createElement("Resources") xml.appendChild(xml_resources) # adding selected samples to xml file add_igv_resource(uuids["node_uuid"], xml_resources, doc) if annot_uuids: # adding selected samples to xml file add_igv_resource(annot_uuids["node_uuid"], xml_resources, doc) # adds sample information file to IGV session file if samp_file: # <Resource name="Sample Information" # path="http://igv.broadinstitute.org/data/hg18/tcga/gbm/gbmsubtypes/sampleTable.txt.gz"/> # creates Resource element res = doc.createElement("Resource") res.setAttribute("name", "Sample Information") res.setAttribute("path", samp_file) xml_resources.appendChild(res) # <HiddenAttributes> # <Attribute name="DATA FILE"/> # <Attribute name="Linking_id"/> # <Attribute name="DATA TYPE"/> # </HiddenAttributes> # Adding parameters to hide basic unnecessary sample info hidden_attr = doc.createElement("HiddenAttributes") xml.appendChild(hidden_attr) attr = doc.createElement("Attribute") attr.setAttribute("name", "DATA FILE") hidden_attr.appendChild(attr) attr = doc.createElement("Attribute") attr.setAttribute("name", "Linking_id") hidden_attr.appendChild(attr) attr = doc.createElement("Attribute") attr.setAttribute("name", "DATA TYPE") hidden_attr.appendChild(attr) # Creating temp file to enter into file_store tempfilename = tempfile.NamedTemporaryFile(delete=False) tempfilename.write(doc.toprettyxml(indent=" ")) tempfilename.close() # getting file_store_uuid filestore_uuid = create(tempfilename.name, filetype="xml") filestore_item = import_file(filestore_uuid, refresh=True) # file to rename temp_name = filestore_item.datafile.name.split('/') temp_name = temp_name[len(temp_name) - 1] + '.xml' # rename file by way of file_store filestore_item = rename(filestore_uuid, temp_name) # delete temp file os.unlink(tempfilename.name) # Url for session file sessionfile_url = get_full_url(filestore_item.get_datafile_url()) # IGV url for automatic launch of Java Webstart igv_url = "http://www.broadinstitute.org/igv/projects/current/igv.php" \ "?sessionURL=" + sessionfile_url return igv_url
def _parse_file(self, file_name ): try: self._current_file = open( file_name, "rU" ) self._current_reader = csv.reader( self._current_file, dialect="excel-tab", delimiter=self.delimiter ) except: logger.exception( "Unable to read file " + str( self._current_file ) + "." ) # create investigation, study and assay objects investigation = self._create_investigation() study = self._create_study( investigation=investigation, file_name=file_name ) assay = self._create_assay( study=study, file_name=file_name ) #import in file as "pre-isa" file logger.info('trying to add pre-isa archive file %s' % file_name) investigation.pre_isarchive_file = create(file_name, permanent=True) import_file(investigation.pre_isarchive_file, refresh=True, permanent=True) investigation.save() # read column headers headers = [] headers = self._current_reader.next() # compute absolute file_column_index (in case a negative value was provided) if self.file_column_index >= 0: internal_file_column_index = self.file_column_index else: internal_file_column_index = len( headers ) + self.file_column_index # compute absolute auxiliary_file_column_index (in case a negative value was provided) if self.auxiliary_file_column_index is not None: if self.auxiliary_file_column_index >= 0: internal_auxiliary_file_column_index = self.auxiliary_file_column_index else: internal_auxiliary_file_column_index = len( headers ) + self.auxiliary_file_column_index else: internal_auxiliary_file_column_index = None # TODO: test if there are fewer columns than required logger.debug( "Parsing with file column %s and auxiliary file column %s." % ( internal_file_column_index, internal_auxiliary_file_column_index ) ) # iterate over non-header rows in file for row in self._current_reader: # TODO: resolve relative indices internal_source_column_index = self.source_column_index internal_sample_column_index = self.sample_column_index internal_assay_column_index = self.assay_column_index # add data file to file store file_uuid = None if self.file_base_path is None: file_path = row[internal_file_column_index].strip() else: file_path = os.path.join( self.file_base_path, row[internal_file_column_index].strip() ) file_uuid = create( source=file_path, permanent=self.file_permanent ) if file_uuid is not None: logger.debug( "Added data file " + file_path + " to file store." ) else: logger.exception( "Unable to add data file " + file_path + " to file store." ) # add auxiliary file to file store auxiliary_file_uuid = None if internal_auxiliary_file_column_index is not None: if self.file_base_path is None: auxiliary_file_path = row[internal_auxiliary_file_column_index].strip() else: auxiliary_file_path = os.path.join( self.file_base_path, row[internal_auxiliary_file_column_index].strip() ) auxiliary_file_uuid = create( source=auxiliary_file_path, permanent=self.file_permanent ) if auxiliary_file_uuid is not None: logger.debug( "Added auxiliary file " + auxiliary_file_path + " to file store." ) else: logger.exception( "Unable to add auxiliary file " + file_path + " to file store." ) # add files to file server file_server.models.add( file_uuid, auxiliary_file_uuid ); # create nodes if file was successfully created # source node source_name = self._create_name(row, internal_source_column_index, internal_file_column_index) source_node, is_source_new = Node.objects.get_or_create( study=study, name=source_name, type=Node.SOURCE ) # sample node sample_name = self._create_name(row, internal_sample_column_index, internal_file_column_index) sample_node, is_sample_new = Node.objects.get_or_create( study=study, name=sample_name, type=Node.SAMPLE ) source_node.add_child( sample_node ) # assay node assay_name = self._create_name(row, internal_assay_column_index, internal_file_column_index) assay_node, is_assay_new = Node.objects.get_or_create( study=study, assay=assay, name=assay_name, type=Node.ASSAY ) sample_node.add_child( assay_node ) file_node = Node.objects.create( study=study, assay=assay, name=row[internal_file_column_index].strip(), file_uuid=file_uuid, type=Node.RAW_DATA_FILE, species=self._get_species( row ), genome_build=self._get_genome_build( row ), is_annotation=self._is_annotation( row ) ) assay_node.add_child( file_node ) # iterate over columns to create attributes to attach to the sample node for column_index in range( 0, len( row ) ): # skip data file column if ( internal_file_column_index == column_index ) or ( internal_auxiliary_file_column_index == column_index ) or ( self.annotation_column_index == column_index ): continue # create attribute as characteristic and attach to sample node if the sample node was newly created if is_sample_new: attribute = Attribute.objects.create( node=sample_node, type=Attribute.CHARACTERISTICS, subtype=headers[column_index].strip().lower(), value=row[column_index].strip() ) return investigation
def createIGVsession(genome, uuids, is_file_uuid=False): """ Creates session file for selected file uuids, returns newly created filestore uuid :param genome: Genome to be used in session file i.e. hg18, dm3 :type genome: string. :param uuids: Array of UUIDs to be used :type uuids: array. :param uuids: Host URL i.e. 127.0.0.1:8000 :type uuids: string """ # Create IGV Session file and put into Filestore """ http://www.postneo.com/projects/pyxml/ <?xml version="1.0" encoding="UTF-8"?> <Global genome="hg18" locus="EGFR" version="3"> <Resources> <Resource name="RNA Genes" path="http://www.broadinstitute.org/igvdata/tcga/gbm/GBM_batch1-8_level3_exp.txt.recentered.080820.gct.tdf"/> <Resource name="RNA Genes" path="http://www.broadinstitute.org/igvdata/annotations/hg18/rna_genes.bed"/> <Resource name="sno/miRNA" path="http://www.broadinstitute.org/igvdata/tcga/gbm/Sample_info.txt"/> </Resources> </Global> """ logger.debug("visualization_manager.createIGVsession called") # Create the minidom document doc = Document() # Create the <wml> base element xml = doc.createElement("Global") xml.setAttribute("genome", genome) xml.setAttribute("locus", "All") xml.setAttribute("version", "4") doc.appendChild(xml) # Add Resources xml_resources = doc.createElement("Resources") xml.appendChild(xml_resources) # get paths to url for samp in uuids: # gets filestore item curr_name, curr_url = get_file_name(samp, is_file_uuid=is_file_uuid) logger.debug( 'New resource: ' + curr_name + ' - ' + curr_url ) # What to do if fs does not exist? if (curr_name): # creates Resource element res = doc.createElement("Resource") res.setAttribute("name", curr_name) res.setAttribute("path", curr_url) xml_resources.appendChild(res) # Creating temp file to enter into file_store tempfilename = tempfile.NamedTemporaryFile(delete=False) tempfilename.write(doc.toprettyxml(indent=" ")) tempfilename.close() # getting file_store_uuid filestore_uuid = create(tempfilename.name, permanent=True, filetype="xml") filestore_item = import_file(filestore_uuid, permanent=True, refresh=True) # file to rename temp_name = filestore_item.datafile.name.split('/') temp_name = temp_name[len(temp_name)-1] + '.xml' # rename file by way of file_store filestore_item = rename(filestore_uuid, temp_name) # delete temp file os.unlink(tempfilename.name) # Print our newly created XML #print doc.toprettyxml(indent=" ") #print filestore_item.datafile.url # Url for session file fs_url = filestore_item.get_full_url() # IGV url for automatic launch of Java Webstart igv_url = "http://www.broadinstitute.org/igv/projects/current/igv.php?sessionURL=" + fs_url return igv_url
def addIGVSamples(fields, results_samp, annot_samples=None): """ creates phenotype file for IGV :param samples: Solr results for samples to be included :type samples: Array. :param annot_samples: includes annotation files included with solr results :type annot_samples: Array """ #logger.debug("visualization_manager.views addIGVSamples called, fields=%s" % fields) # creates human readable indexes of fields to iterate over fields_dict = {} for i in fields: find_index = i.find("_Characteristics_") if find_index > -1: new_key = i.split("_Characteristics_")[0] fields_dict[i] = new_key # Creating temp file to enter into file_store tempsampname = tempfile.NamedTemporaryFile(delete=False) # writing header to sample file tempsampname.write("#sampleTable" + "\n") # writing column names to sample file col_names = "Linking_id" for k,v in fields_dict.iteritems(): col_names = col_names + '\t' + v tempsampname.write(col_names + "\n") # iterating over sample files pheno_results = get_sample_lines(fields_dict, results_samp) tempsampname.write(pheno_results) # if annotations are not null if annot_samples: #results_annot = annot_samples["response"]["docs"] pheno_annot = get_sample_lines(fields_dict, annot_samples) tempsampname.write(pheno_annot) # closing temp file tempsampname.close() # getting file_store_uuid filestore_uuid = create(tempsampname.name, permanent=True, filetype="txt") filestore_item = import_file(filestore_uuid, permanent=True, refresh=True) # file to rename temp_file = filestore_item.datafile.name.split('/') temp_file = temp_file[len(temp_file)-1] + '.txt' # rename file by way of file_store filestore_item = rename(filestore_uuid, temp_file) # getting file information based on file_uuids curr_fs = FileStoreItem.objects.get(uuid=filestore_uuid) curr_name = curr_fs.datafile.name # full path to selected UUID File curr_url = curr_fs.get_full_url() # delete temp file os.unlink(tempsampname.name) return curr_url
def createIGVsessionAnnot(genome, uuids, annot_uuids=None, samp_file=None): """ Creates session file for selected file uuids, returns newly created filestore uuid :param genome: Genome to be used in session file i.e. hg18, dm3 :type genome: string. :param uuids: Array of UUIDs to be used :type uuids: array. :param uuids: Host URL i.e. 127.0.0.1:8000 :type uuids: string """ # Create IGV Session file and put into Filestore """ http://www.postneo.com/projects/pyxml/ <?xml version="1.0" encoding="UTF-8"?> <Global genome="hg18" locus="EGFR" version="3"> <Resources> <Resource name="RNA Genes" path="http://www.broadinstitute.org/igvdata/tcga/gbm/GBM_batch1-8_level3_exp.txt.recentered.080820.gct.tdf"/> <Resource name="RNA Genes" path="http://www.broadinstitute.org/igvdata/annotations/hg18/rna_genes.bed"/> <Resource name="sno/miRNA" path="http://www.broadinstitute.org/igvdata/tcga/gbm/Sample_info.txt"/> </Resources> </Global> """ #logger.debug("visualization_manager.views createIGVsessionAnnot called") # Create the minidom document doc = Document() # Create the <wml> base element xml = doc.createElement("Global") xml.setAttribute("genome", genome) xml.setAttribute("locus", "All") xml.setAttribute("version", "4") doc.appendChild(xml) # Add Resources xml_resources = doc.createElement("Resources") xml.appendChild(xml_resources) # adding selected samples to xml file addIGVResource(uuids["node_uuid"], xml_resources, doc) if annot_uuids: # adding selected samples to xml file addIGVResource(annot_uuids["node_uuid"], xml_resources, doc) # adds sample information file to IGV session file if samp_file: #<Resource name="Sample Information" path="http://igv.broadinstitute.org/data/hg18/tcga/gbm/gbmsubtypes/sampleTable.txt.gz"/> # creates Resource element res = doc.createElement("Resource") res.setAttribute("name", "Sample Information") res.setAttribute("path", samp_file) xml_resources.appendChild(res) #<HiddenAttributes> # <Attribute name="DATA FILE"/> # <Attribute name="Linking_id"/> # <Attribute name="DATA TYPE"/> #</HiddenAttributes> # Adding parameters to hide basic unnecessary sample info hidden_attr = doc.createElement("HiddenAttributes") xml.appendChild(hidden_attr) attr = doc.createElement("Attribute") attr.setAttribute("name", "DATA FILE") hidden_attr.appendChild(attr) attr = doc.createElement("Attribute") attr.setAttribute("name", "Linking_id") hidden_attr.appendChild(attr) attr = doc.createElement("Attribute") attr.setAttribute("name", "DATA TYPE") hidden_attr.appendChild(attr) # Creating temp file to enter into file_store tempfilename = tempfile.NamedTemporaryFile(delete=False) tempfilename.write(doc.toprettyxml(indent=" ")) tempfilename.close() # getting file_store_uuid filestore_uuid = create(tempfilename.name, permanent=True, filetype="xml") filestore_item = import_file(filestore_uuid, permanent=True, refresh=True) # file to rename temp_name = filestore_item.datafile.name.split('/') temp_name = temp_name[len(temp_name)-1] + '.xml' # rename file by way of file_store filestore_item = rename(filestore_uuid, temp_name) # delete temp file os.unlink(tempfilename.name) # Print our newly created XML #logger.info( doc.toprettyxml(indent=" ")) #print filestore_item.datafile.url # Url for session file fs_url = filestore_item.get_full_url() # IGV url for automatic launch of Java Webstart igv_url = "http://www.broadinstitute.org/igv/projects/current/igv.php?sessionURL=" + fs_url return igv_url
def run(self): # create investigation, study and assay objects investigation = self._create_investigation() # FIXME: self.metadata_file.name may not be informative, especially in # case of temp files that don't exist on disk study = self._create_study(investigation=investigation, file_name=self.metadata_file.name) assay = self._create_assay(study=study, file_name=self.metadata_file.name) # import in file as "pre-isa" file logger.info("trying to add pre-isa archive file %s", self.metadata_file.name) # FIXME: this will not create a FileStoreItem if self.metadata_file # does not exist on disk (e.g., a file object like TemporaryFile) investigation.pre_isarchive_file = create( self.metadata_file.name, permanent=True) import_file(investigation.pre_isarchive_file, refresh=True) investigation.save() # TODO: test if there are fewer columns than required logger.debug("Parsing with file column %s and " "auxiliary file column %s", self.file_column_index, self.auxiliary_file_column_index) # UUIDs of data files to postpone importing until parsing is finished data_files = [] # iterate over non-header rows in file for row in self.metadata_reader: # TODO: resolve relative indices internal_source_column_index = self.source_column_index internal_sample_column_index = self.sample_column_index internal_assay_column_index = self.assay_column_index # add data file to file store data_file_path = self.file_source_translator( row[self.file_column_index]) data_file_uuid = create( source=data_file_path, permanent=self.file_permanent) data_files.append(data_file_uuid) # add auxiliary file to file store if self.auxiliary_file_column_index: auxiliary_file_path = self.file_source_translator( row[self.auxiliary_file_column_index]) auxiliary_file_uuid = create( source=auxiliary_file_path, permanent=self.file_permanent) data_files.append(auxiliary_file_uuid) else: auxiliary_file_uuid = None # add files to file server # TODO: add error handling in case of None values for UUIDs file_server.models.add(data_file_uuid, auxiliary_file_uuid) # create nodes if file was successfully created # source node source_name = self._create_name( row, internal_source_column_index, self.file_column_index) source_node, is_source_new = Node.objects.get_or_create( study=study, name=source_name, type=Node.SOURCE) # sample node sample_name = self._create_name( row, internal_sample_column_index, self.file_column_index) sample_node, is_sample_new = Node.objects.get_or_create( study=study, name=sample_name, type=Node.SAMPLE) source_node.add_child(sample_node) # assay node assay_name = self._create_name( row, internal_assay_column_index, self.file_column_index) assay_node, is_assay_new = Node.objects.get_or_create( study=study, assay=assay, name=assay_name, type=Node.ASSAY) sample_node.add_child(assay_node) file_node = Node.objects.create( study=study, assay=assay, name=row[self.file_column_index].strip(), file_uuid=data_file_uuid, type=Node.RAW_DATA_FILE, species=self._get_species(row), genome_build=self._get_genome_build(row), is_annotation=self._is_annotation(row)) assay_node.add_child(file_node) # iterate over columns to create attributes to attach to sample # node for column_index in range(0, len(row)): # skip data file column if (self.file_column_index == column_index or self.auxiliary_file_column_index == column_index or self.annotation_column_index == column_index): continue # create attribute as characteristic and attach to sample node # if the sample node was newly created if is_sample_new: Attribute.objects.create( node=sample_node, type=Attribute.CHARACTERISTICS, subtype=self.headers[column_index].strip().lower(), value=row[column_index].strip() ) # kick off data file importing tasks for uuid in data_files: import_file.delay(uuid) return investigation
def run(self): # create investigation, study and assay objects investigation = self._create_investigation() # FIXME: self.metadata_file.name may not be informative, especially in # case of temp files that don't exist on disk study = self._create_study(investigation=investigation, file_name=self.metadata_file.name) assay = self._create_assay(study=study, file_name=self.metadata_file.name) # import in file as "pre-isa" file logger.info("trying to add pre-isa archive file %s", self.metadata_file.name) # FIXME: this will not create a FileStoreItem if self.metadata_file # does not exist on disk (e.g., a file object like TemporaryFile) investigation.pre_isarchive_file = create(self.metadata_file.name) import_file(investigation.pre_isarchive_file, refresh=True) investigation.save() # TODO: test if there are fewer columns than required logger.debug( "Parsing with file column %s and " "auxiliary file column %s", self.file_column_index, self.auxiliary_file_column_index) # UUIDs of data files to postpone importing until parsing is finished data_files = [] # iterate over non-header rows in file for row in self.metadata_reader: # TODO: resolve relative indices internal_source_column_index = self.source_column_index internal_sample_column_index = self.sample_column_index internal_assay_column_index = self.assay_column_index # add data file to file store data_file_path = self.file_source_translator( row[self.file_column_index]) data_file_uuid = create(source=data_file_path) data_files.append(data_file_uuid) # add auxiliary file to file store if self.auxiliary_file_column_index: auxiliary_file_path = self.file_source_translator( row[self.auxiliary_file_column_index]) auxiliary_file_uuid = create(source=auxiliary_file_path) data_files.append(auxiliary_file_uuid) else: auxiliary_file_uuid = None # add files to file server # TODO: add error handling in case of None values for UUIDs file_server.models.add(data_file_uuid, auxiliary_file_uuid) # create nodes if file was successfully created # source node source_name = self._create_name(row, internal_source_column_index, self.file_column_index) source_node, is_source_new = Node.objects.get_or_create( study=study, name=source_name, type=Node.SOURCE) # sample node sample_name = self._create_name(row, internal_sample_column_index, self.file_column_index) sample_node, is_sample_new = Node.objects.get_or_create( study=study, name=sample_name, type=Node.SAMPLE) source_node.add_child(sample_node) # assay node assay_name = self._create_name(row, internal_assay_column_index, self.file_column_index) assay_node, is_assay_new = Node.objects.get_or_create( study=study, assay=assay, name=assay_name, type=Node.ASSAY) sample_node.add_child(assay_node) file_node = Node.objects.create( study=study, assay=assay, name=row[self.file_column_index].strip(), file_uuid=data_file_uuid, type=Node.RAW_DATA_FILE, species=self._get_species(row), genome_build=self._get_genome_build(row), is_annotation=self._is_annotation(row)) assay_node.add_child(file_node) # iterate over columns to create attributes to attach to sample # node for column_index in range(0, len(row)): # skip data file column if (self.file_column_index == column_index or self.auxiliary_file_column_index == column_index or self.annotation_column_index == column_index): continue # create attribute as characteristic and attach to sample node # if the sample node was newly created if is_sample_new: Attribute.objects.create( node=sample_node, type=Attribute.CHARACTERISTICS, subtype=self.headers[column_index].strip().lower(), value=row[column_index].strip()) # Start remote file import tasks if `Make Import Permanent:` flag set # by the user # Likewise, we'll try to import these files if their source begins with # our REFINERY_DATA_IMPORT_DIR setting (This will be the case if # users upload datafiles associated with their metadata) for uuid in data_files: try: file_store_item = FileStoreItem.objects.get(uuid=uuid) except (FileStoreItem.DoesNotExist, FileStoreItem.MultipleObjectsReturned) as e: logger.error("Couldn't properly fetch FileStoreItem %s", e) else: if (self.file_permanent or file_store_item.source.startswith( (settings.REFINERY_DATA_IMPORT_DIR, 's3://'))): import_file.delay(uuid) return investigation
def add_igv_samples(fields, results_samp, annot_samples=None): """creates phenotype file for IGV :param samples: Solr results for samples to be included :type samples: Array. :param annot_samples: includes annotation files included with solr results :type annot_samples: Array """ # creates human readable indexes of fields to iterate over fields_dict = {} for i in fields: find_index = i.find("_Characteristics_") if find_index > -1: new_key = i.split("_Characteristics_")[0] fields_dict[i] = new_key # Creating temp file to enter into file_store temp_sample_name = tempfile.NamedTemporaryFile(delete=False) # writing header to sample file temp_sample_name.write("#sampleTable" + "\n") # writing column names to sample file col_names = "Linking_id" for k, v in fields_dict.iteritems(): col_names = col_names + "\t" + v temp_sample_name.write(col_names + "\n") # iterating over sample files pheno_results = get_sample_lines(fields_dict, results_samp) try: temp_sample_name.write(pheno_results) except UnicodeEncodeError as e: logger.error( "Could not write results to file: %s. " "Trying again with the content to write encoded " "properly.", e ) temp_sample_name.write(pheno_results.encode("utf-8")) # if annotations are not null if annot_samples: pheno_annot = get_sample_lines(fields_dict, annot_samples) temp_sample_name.write(pheno_annot) # closing temp file temp_sample_name.close() # getting file_store_uuid filestore_uuid = create(temp_sample_name.name, filetype="txt") filestore_item = import_file(filestore_uuid, refresh=True) # file to rename temp_file = filestore_item.datafile.name.split("/") temp_file = temp_file[len(temp_file) - 1] + ".txt" # rename file by way of file_store filestore_item = rename(filestore_uuid, temp_file) # getting file information based on file_uuids curr_fs = FileStoreItem.objects.get(uuid=filestore_uuid) # full path to selected UUID File curr_url = get_full_url(curr_fs.get_datafile_url()) # delete temp file os.unlink(temp_sample_name.name) return curr_url