예제 #1
0
def copy_file(orig_uuid):
    """Helper function that copies a file if given the original file's UUID
    :param orig_uuid: UUID of file to copy.
    :type orig_uuid: str.
    :returns: UUID of newly copied file.
    """
    orig_fsi = read(orig_uuid)
    newfile_uuid = None
    try:
        newfile_uuid = create(orig_fsi.source, orig_fsi.sharename, orig_fsi.filetype, permanent=is_permanent(orig_uuid))
        import_file(newfile_uuid, refresh=True)
    except AttributeError:
        pass

    return newfile_uuid
예제 #2
0
def addIGVSamples(fields, results_samp, annot_samples=None):
    """creates phenotype file for IGV
    :param samples: Solr results for samples to be included
    :type samples: Array.
    :param annot_samples: includes annotation files included with solr results
    :type annot_samples: Array
    """
    # creates human readable indexes of fields to iterate over
    fields_dict = {}
    for i in fields:
        find_index = i.find("_Characteristics_")
        if find_index > -1:
            new_key = i.split("_Characteristics_")[0]
            fields_dict[i] = new_key

    # Creating temp file to enter into file_store
    tempsampname = tempfile.NamedTemporaryFile(delete=False)

    # writing header to sample file
    tempsampname.write("#sampleTable" + "\n")

    # writing column names to sample file
    col_names = "Linking_id"
    for k, v in fields_dict.iteritems():
        col_names = col_names + '\t' + v
    tempsampname.write(col_names + "\n")

    # iterating over sample files
    pheno_results = get_sample_lines(fields_dict, results_samp)
    tempsampname.write(pheno_results)

    # if annotations are not null
    if annot_samples:
        pheno_annot = get_sample_lines(fields_dict, annot_samples)
        tempsampname.write(pheno_annot)

    # closing temp file
    tempsampname.close()

    # getting file_store_uuid
    filestore_uuid = create(tempsampname.name, permanent=True, filetype="txt")
    filestore_item = import_file(filestore_uuid, permanent=True, refresh=True)

    # file to rename
    temp_file = filestore_item.datafile.name.split('/')
    temp_file = temp_file[len(temp_file) - 1] + '.txt'

    # rename file by way of file_store
    filestore_item = rename(filestore_uuid, temp_file)

    # getting file information based on file_uuids
    curr_fs = FileStoreItem.objects.get(uuid=filestore_uuid)

    # full path to selected UUID File
    curr_url = get_full_url(curr_fs.get_datafile_url())

    # delete temp file
    os.unlink(tempsampname.name)

    return curr_url
def copy_file(original_item_uuid):
    """Creates a copy of a FileStoreItem with the given UUID"""
    try:
        original_item = FileStoreItem.objects.get(uuid=original_item_uuid)
    except (FileStoreItem.DoesNotExist,
            FileStoreItem.MultipleObjectsReturned) as exc:
        logger.error("Failed to copy FileStoreItem with UUID '%s': %s",
                     original_item_uuid, exc)
        return None
    try:
        new_item = FileStoreItem.objects.create(
            source=original_item.source, filetype=original_item.filetype)
    except AttributeError:
        return None
    else:
        import_file(new_item.uuid, refresh=True)
        return new_item.uuid
예제 #4
0
def copy_file(orig_uuid):
    """Helper function that copies a file if given the original file's UUID
    :param orig_uuid: UUID of file to copy.
    :type orig_uuid: str.
    :returns: UUID of newly copied file.
    """
    orig_fsi = read(orig_uuid)
    newfile_uuid = None
    try:
        newfile_uuid = create(
            orig_fsi.source, orig_fsi.sharename, orig_fsi.filetype,
            permanent=is_permanent(orig_uuid)
        )
        import_file(newfile_uuid, refresh=True)
    except AttributeError:
        pass

    return newfile_uuid
예제 #5
0
def copy_file(orig_uuid):
    """Helper function that copies a file if given the original file's UUID
    :param orig_uuid: UUID of file to copy.
    :type orig_uuid: str.
    :returns: UUID of newly copied file.
    """
    newfile_uuid = None
    try:
        orig_fsi = FileStoreItem.objects.get(uuid=orig_uuid)
    except (FileStoreItem.DoesNotExist,
            FileStoreItem.MultipleObjectsReturned) as e:
        logger.error("Couldn't properly fetch FileStoreItem: %s", e)
    else:
        try:
            newfile_uuid = create(orig_fsi.source,
                                  orig_fsi.sharename,
                                  orig_fsi.filetype,
                                  permanent=is_permanent(orig_uuid))
            import_file(newfile_uuid, refresh=True)
        except AttributeError:
            pass

    return newfile_uuid
예제 #6
0
    def run(self, path, isa_archive=None, preisa_archive=None):
        """If path is a file it will be treated as an ISArchive, if it is a
        directory it will be treated as an extracted ISArchive. Assumes that
        the archive extracts into a subdirectory named <archive> if the
        ISArchive is called <archive>.zip.
        """
        # reset all variables
        self._current_investigation = None
        self._current_study = None
        self._current_assay = None
        self._current_node = None
        self._previous_node = None
        self._current_attribute = None
        self._current_protocol_reference = None
        self._current_reader = None
        self._current_file = None
        self._current_file_name = None
        # 1. test if archive needs to be extracted and extract if necessary
        if not os.path.isdir(path):
            # assign to isa_archive if it's an archive anyway
            isa_archive = path
            logger.info(
                "Supplied path \"" + path + "\" is not a directory. Assuming "
                "ISArchive file.")
            try:
                # TODO: do we need a random subdirectory here?
                extract_path = tempfile.mkdtemp()
                with ZipFile(path, 'r') as zip:
                    # test if any paths are relative or absolute and outside
                    # the extract path
                    for name in zip.namelist():
                        if name.startswith("..") or name.startswith("/"):
                            logger.exception(
                                "Unable to extract assumed ISArchive file \"" +
                                path + "\" due to illegal file path: " + name
                            )
                    # extract archive
                    zip.extractall(extract_path)
                    first_file = zip.namelist()[0]
                    # test if first entry in zip file is a path
                    if first_file.endswith("/"):
                        # add archive subdirectory to path
                        extract_path = os.path.join(extract_path, first_file)
                    elif re.search(r'/', first_file):
                        ind = string.find(first_file, '/')
                        extract_path = os.path.join(
                            extract_path,
                            first_file[:ind]
                        )

                    logger.info(
                        "ISArchive extracted to \"" + extract_path + "\"."
                    )
                    path = extract_path
            except:
                logger.exception(
                    "Unable to extract assumed ISArchive file \"" + path +
                    "\".")
        # 2. identify investigation file
        try:
            investigation_file_name = glob.glob("%s/i*.txt" % path).pop()
        except IndexError as exception:
            logger.exception(
                "Unable to identify ISArchive file in \"" + path + "\".")
            raise exception
        # 3. parse investigation file and identify study files and
        # corresponding assay files
        self._parse_investigation_file(investigation_file_name)
        # 4. parse all study files and corresponding assay files
        if self._current_investigation is not None:
            # identify studies associated with this investigation
            for study in self._current_investigation.study_set.all():
                # parse study file
                self._current_assay = None
                study_file_name = os.path.join(path, study.file_name)
                if data_set_manager.tasks.fix_last_col(study_file_name):
                    self._parse_study_file(study, study_file_name)
                    for assay in study.assay_set.all():
                        # parse assay file
                        self._previous_node = None
                        assay_file_name = os.path.join(path, assay.file_name)
                        if data_set_manager.tasks.fix_last_col(
                                assay_file_name):
                            self._parse_assay_file(
                                study,
                                assay,
                                assay_file_name)
        else:
            logger.exception(
                "No investigation was identified when parsing investigation "
                "file \"" + investigation_file_name + "\"")
            raise Exception()
        # 5. assign ISA-Tab archive and pre-ISA-Tab archive if present
        try:
            self._current_investigation.isarchive_file = create(isa_archive)
            import_file(self._current_investigation.isarchive_file,
                        refresh=True)
        except:
            pass

        if preisa_archive:
            self._current_investigation.pre_isarchive_file = \
                create(preisa_archive)
            import_file(self._current_investigation.pre_isarchive_file,
                        refresh=True)

        self._current_investigation.save()
        return self._current_investigation
    def run(self, path, isa_archive=None, preisa_archive=None):
        """If path is a file it will be treated as an ISArchive, if it is a
        directory it will be treated as an extracted ISArchive. Assumes that
        the archive extracts into a subdirectory named <archive> if the
        ISArchive is called <archive>.zip.
        """
        # reset all variables
        self._current_investigation = None
        self._current_study = None
        self._current_assay = None
        self._current_node = None
        self._previous_node = None
        self._current_attribute = None
        self._current_protocol_reference = None
        self._current_reader = None
        self._current_file = None
        self._current_file_name = None
        # 1. test if archive needs to be extracted and extract if necessary
        if not os.path.isdir(path):
            # assign to isa_archive if it's an archive anyway
            isa_archive = path
            logger.info("Supplied path \"" + path +
                        "\" is not a directory. Assuming "
                        "ISArchive file.")
            try:
                # TODO: do we need a random subdirectory here?
                extract_path = tempfile.mkdtemp()
                with ZipFile(path, 'r') as zip:
                    # test if any paths are relative or absolute and outside
                    # the extract path
                    for name in zip.namelist():
                        if name.startswith("..") or name.startswith("/"):
                            logger.exception(
                                "Unable to extract assumed ISArchive file \"" +
                                path + "\" due to illegal file path: " + name)
                    # extract archive
                    zip.extractall(extract_path)
                    first_file = zip.namelist()[0]
                    # test if first entry in zip file is a path
                    if first_file.endswith("/"):
                        # add archive subdirectory to path
                        extract_path = os.path.join(extract_path, first_file)
                    elif re.search(r'/', first_file):
                        ind = string.find(first_file, '/')
                        extract_path = os.path.join(extract_path,
                                                    first_file[:ind])

                    logger.info("ISArchive extracted to \"" + extract_path +
                                "\".")
                    path = extract_path
            except:
                logger.exception(
                    "Unable to extract assumed ISArchive file \"" + path +
                    "\".")
        # 2. identify investigation file
        try:
            investigation_file_name = glob.glob("%s/i*.txt" % path).pop()
        except IndexError as exception:
            logger.exception("Unable to identify ISArchive file in \"" + path +
                             "\".")
            raise exception
        # 3. parse investigation file and identify study files and
        # corresponding assay files
        self._parse_investigation_file(investigation_file_name)
        # 4. parse all study files and corresponding assay files
        if self._current_investigation is not None:
            # identify studies associated with this investigation
            for study in self._current_investigation.study_set.all():
                # parse study file
                self._current_assay = None
                study_file_name = os.path.join(path, study.file_name)
                if data_set_manager.tasks.fix_last_col(study_file_name):
                    self._parse_study_file(study, study_file_name)
                    for assay in study.assay_set.all():
                        # parse assay file
                        self._previous_node = None
                        assay_file_name = os.path.join(path, assay.file_name)
                        if data_set_manager.tasks.fix_last_col(
                                assay_file_name):
                            self._parse_assay_file(study, assay,
                                                   assay_file_name)
        else:
            logger.exception(
                "No investigation was identified when parsing investigation "
                "file \"" + investigation_file_name + "\"")
            raise Exception()
        # 5. assign ISA-Tab archive and pre-ISA-Tab archive if present
        try:
            self._current_investigation.isarchive_file = create(isa_archive)
            import_file(self._current_investigation.isarchive_file,
                        refresh=True)
        except:
            pass

        if preisa_archive:
            self._current_investigation.pre_isarchive_file = \
                create(preisa_archive)
            import_file(self._current_investigation.pre_isarchive_file,
                        refresh=True)

        self._current_investigation.save()
        return self._current_investigation
예제 #8
0
def create_igv_session(genome, uuids, is_file_uuid=False):
    """ Creates session file for selected file uuids, returns newly created
    filestore uuid
    :param is_file_uuid:
    :param genome: Genome to be used in session file i.e. hg18, dm3
    :type genome: string.
    :param uuids: Array of UUIDs to be used
    :type uuids: array.
    :param uuids: Host URL i.e. 127.0.0.1:8000
    :type uuids: string
    """
    # Create IGV Session file and put into Filestore
    """
    http://www.postneo.com/projects/pyxml/

    <?xml version="1.0" encoding="UTF-8"?>
        <Global genome="hg18" locus="EGFR" version="3">
        <Resources>
            <Resource name="RNA Genes"
            path="http://www.broadinstitute.org/igvdata/tcga/gbm/GBM_batch1-8_level3_exp.txt.recentered.080820.gct.tdf"/>
            <Resource name="RNA Genes"
            path="http://www.broadinstitute.org/igvdata/annotations/hg18/rna_genes.bed"/>
            <Resource name="sno/miRNA"
            path="http://www.broadinstitute.org/igvdata/tcga/gbm/Sample_info.txt"/>
        </Resources>
    </Global>
    """
    logger.debug("visualization_manager.create_igv_session called")

    # Create the minidom document
    doc = Document()
    # Create the <wml> base element
    xml = doc.createElement("Global")
    xml.setAttribute("genome", genome)
    xml.setAttribute("locus", "All")
    xml.setAttribute("version", "4")
    doc.appendChild(xml)
    # Add Resources
    xml_resources = doc.createElement("Resources")
    xml.appendChild(xml_resources)
    # get paths to url
    for samp in uuids:
        # gets filestore item
        curr_name, curr_url = get_file_name(samp, is_file_uuid=is_file_uuid)

        logger.debug('New resource: ' + curr_name + ' - ' + curr_url)

        # What to do if fs does not exist?
        if curr_name:
            # creates Resource element
            res = doc.createElement("Resource")
            res.setAttribute("name", curr_name)
            res.setAttribute("path", curr_url)
            xml_resources.appendChild(res)
    # Creating temp file to enter into file_store
    tempfilename = tempfile.NamedTemporaryFile(delete=False)
    tempfilename.write(doc.toprettyxml(indent="  "))
    tempfilename.close()
    # getting file_store_uuid
    filestore_uuid = create(tempfilename.name, filetype="xml")
    filestore_item = import_file(filestore_uuid, refresh=True)
    # file to rename
    temp_name = filestore_item.datafile.name.split('/')
    temp_name = temp_name[len(temp_name) - 1] + '.xml'
    # rename file by way of file_store
    filestore_item = rename(filestore_uuid, temp_name)
    # delete temp file
    os.unlink(tempfilename.name)
    # Url for session file
    fs_url = get_full_url(filestore_item.get_datafile_url())
    # IGV url for automatic launch of Java Webstart
    igv_url = "http://www.broadinstitute.org/igv/projects/current/igv.php" \
              "?sessionURL=" + fs_url
    return igv_url
예제 #9
0
def add_igv_samples(fields, results_samp, annot_samples=None):
    """creates phenotype file for IGV
    :param samples: Solr results for samples to be included
    :type samples: Array.
    :param annot_samples: includes annotation files included with solr results
    :type annot_samples: Array
    """
    # creates human readable indexes of fields to iterate over
    fields_dict = {}
    for i in fields:
        find_index = i.find("_Characteristics_")
        if find_index > -1:
            new_key = i.split("_Characteristics_")[0]
            fields_dict[i] = new_key

    # Creating temp file to enter into file_store
    temp_sample_name = tempfile.NamedTemporaryFile(delete=False)

    # writing header to sample file
    temp_sample_name.write("#sampleTable" + "\n")

    # writing column names to sample file
    col_names = "Linking_id"
    for k, v in fields_dict.iteritems():
        col_names = col_names + '\t' + v
    temp_sample_name.write(col_names + "\n")

    # iterating over sample files
    pheno_results = get_sample_lines(fields_dict, results_samp)
    try:
        temp_sample_name.write(pheno_results)
    except UnicodeEncodeError as e:
        logger.error("Could not write results to file: %s. "
                     "Trying again with the content to write encoded "
                     "properly." % e)
        temp_sample_name.write(pheno_results.encode("utf-8"))

    # if annotations are not null
    if annot_samples:
        pheno_annot = get_sample_lines(fields_dict, annot_samples)
        temp_sample_name.write(pheno_annot)

    # closing temp file
    temp_sample_name.close()

    # getting file_store_uuid
    filestore_uuid = create(temp_sample_name.name, filetype="txt")
    filestore_item = import_file(filestore_uuid, refresh=True)

    # file to rename
    temp_file = filestore_item.datafile.name.split('/')
    temp_file = temp_file[len(temp_file) - 1] + '.txt'

    # rename file by way of file_store
    filestore_item = rename(filestore_uuid, temp_file)

    # getting file information based on file_uuids
    curr_fs = FileStoreItem.objects.get(uuid=filestore_uuid)

    # full path to selected UUID File
    curr_url = get_full_url(curr_fs.get_datafile_url())

    # delete temp file
    os.unlink(temp_sample_name.name)

    return curr_url
예제 #10
0
def create_igv_session_annot(genome, uuids, annot_uuids=None, samp_file=None):
    """Creates session file for selected file uuids, returns newly created
    filestore uuid
    :param genome: Genome to be used in session file i.e. hg18, dm3
    :type genome: string.
    :param uuids: Array of UUIDs to be used
    :type uuids: array.
    :param uuids: Host URL i.e. 127.0.0.1:8000
    :type uuids: string
    """
    # Create IGV Session file and put into Filestore
    """
    http://www.postneo.com/projects/pyxml/

    <?xml version="1.0" encoding="UTF-8"?>
        <Global genome="hg18" locus="EGFR" version="3">
        <Resources>
            <Resource name="RNA Genes"
            path="http://www.broadinstitute.org/igvdata/tcga/gbm/GBM_batch1-8_level3_exp.txt.recentered.080820.gct.tdf"/>
            <Resource name="RNA Genes"
            path="http://www.broadinstitute.org/igvdata/annotations/hg18/rna_genes.bed"/>
            <Resource name="sno/miRNA"
            path="http://www.broadinstitute.org/igvdata/tcga/gbm/Sample_info.txt"/>
        </Resources>
    </Global>
    """
    # Create the minidom document
    doc = Document()
    # Create the <wml> base element
    xml = doc.createElement("Global")
    xml.setAttribute("genome", genome)
    xml.setAttribute("locus", "All")
    xml.setAttribute("version", "4")
    doc.appendChild(xml)
    # Add Resources
    xml_resources = doc.createElement("Resources")
    xml.appendChild(xml_resources)
    # adding selected samples to xml file
    add_igv_resource(uuids["node_uuid"], xml_resources, doc)
    if annot_uuids:
        # adding selected samples to xml file
        add_igv_resource(annot_uuids["node_uuid"], xml_resources, doc)
    # adds sample information file to IGV session file
    if samp_file:
        # <Resource name="Sample Information"
        # path="http://igv.broadinstitute.org/data/hg18/tcga/gbm/gbmsubtypes/sampleTable.txt.gz"/>
        # creates Resource element
        res = doc.createElement("Resource")
        res.setAttribute("name", "Sample Information")
        res.setAttribute("path", samp_file)
        xml_resources.appendChild(res)
    # <HiddenAttributes>
    #    <Attribute name="DATA FILE"/>
    #    <Attribute name="Linking_id"/>
    #    <Attribute name="DATA TYPE"/>
    # </HiddenAttributes>
    # Adding parameters to hide basic unnecessary sample info
    hidden_attr = doc.createElement("HiddenAttributes")
    xml.appendChild(hidden_attr)

    attr = doc.createElement("Attribute")
    attr.setAttribute("name", "DATA FILE")
    hidden_attr.appendChild(attr)

    attr = doc.createElement("Attribute")
    attr.setAttribute("name", "Linking_id")
    hidden_attr.appendChild(attr)

    attr = doc.createElement("Attribute")
    attr.setAttribute("name", "DATA TYPE")
    hidden_attr.appendChild(attr)

    # Creating temp file to enter into file_store
    tempfilename = tempfile.NamedTemporaryFile(delete=False)
    tempfilename.write(doc.toprettyxml(indent="  "))
    tempfilename.close()

    # getting file_store_uuid
    filestore_uuid = create(tempfilename.name, filetype="xml")
    filestore_item = import_file(filestore_uuid, refresh=True)

    # file to rename
    temp_name = filestore_item.datafile.name.split('/')
    temp_name = temp_name[len(temp_name) - 1] + '.xml'

    # rename file by way of file_store
    filestore_item = rename(filestore_uuid, temp_name)

    # delete temp file
    os.unlink(tempfilename.name)

    # Url for session file
    sessionfile_url = get_full_url(filestore_item.get_datafile_url())

    # IGV url for automatic launch of Java Webstart
    igv_url = "http://www.broadinstitute.org/igv/projects/current/igv.php" \
              "?sessionURL=" + sessionfile_url

    return igv_url
예제 #11
0
    def _parse_file(self, file_name ):
        try:
            self._current_file =  open( file_name, "rU" )
            self._current_reader = csv.reader( self._current_file, dialect="excel-tab", delimiter=self.delimiter )
        except:
            logger.exception( "Unable to read file " + str( self._current_file ) + "." )
        
        # create investigation, study and assay objects
        investigation = self._create_investigation()
        study = self._create_study( investigation=investigation, file_name=file_name )                
        assay = self._create_assay( study=study, file_name=file_name )
        
        #import in file as "pre-isa" file
        logger.info('trying to add pre-isa archive file %s' % file_name)
        investigation.pre_isarchive_file = create(file_name, permanent=True)
        import_file(investigation.pre_isarchive_file, refresh=True, permanent=True)
        investigation.save()
            
        # read column headers
        headers = []
        headers = self._current_reader.next()
        
        # compute absolute file_column_index (in case a negative value was provided)
        if self.file_column_index >= 0:
            internal_file_column_index = self.file_column_index
        else:                
            internal_file_column_index = len( headers ) + self.file_column_index

        # compute absolute auxiliary_file_column_index (in case a negative value was provided)
        if self.auxiliary_file_column_index is not None:
            if self.auxiliary_file_column_index >= 0:
                internal_auxiliary_file_column_index = self.auxiliary_file_column_index
            else:                
                internal_auxiliary_file_column_index = len( headers ) + self.auxiliary_file_column_index
        else:
            internal_auxiliary_file_column_index = None
        
        # TODO: test if there are fewer columns than required        
        logger.debug( "Parsing with file column %s and auxiliary file column %s." % ( internal_file_column_index, internal_auxiliary_file_column_index ) )
        
        # iterate over non-header rows in file
        for row in self._current_reader:
            
                
            # TODO: resolve relative indices
            internal_source_column_index = self.source_column_index            
            internal_sample_column_index = self.sample_column_index            
            internal_assay_column_index = self.assay_column_index            
                

            # add data file to file store
            file_uuid = None            

            if self.file_base_path is None:
                file_path = row[internal_file_column_index].strip()
            else:
                file_path = os.path.join( self.file_base_path, row[internal_file_column_index].strip() )

            file_uuid = create( source=file_path, permanent=self.file_permanent )
                                    
            if file_uuid is not None:
                logger.debug( "Added data file " + file_path + " to file store." )
            else:
                logger.exception( "Unable to add data file " + file_path + " to file store." )            


            # add auxiliary file to file store
            auxiliary_file_uuid = None
            
            if internal_auxiliary_file_column_index is not None:
                if self.file_base_path is None:
                    auxiliary_file_path = row[internal_auxiliary_file_column_index].strip()
                else:
                    auxiliary_file_path = os.path.join( self.file_base_path, row[internal_auxiliary_file_column_index].strip() )
                    
                auxiliary_file_uuid = create( source=auxiliary_file_path, permanent=self.file_permanent )
    
                if auxiliary_file_uuid is not None:
                    logger.debug( "Added auxiliary file " + auxiliary_file_path + "  to file store." )
                else:
                    logger.exception( "Unable to add auxiliary file " + file_path + " to file store." )
                    
                
            # add files to file server
            file_server.models.add( file_uuid, auxiliary_file_uuid );

            # create nodes if file was successfully created
            
            # source node
            source_name = self._create_name(row, internal_source_column_index, internal_file_column_index)                                
            source_node, is_source_new = Node.objects.get_or_create(
                study=study,
                name=source_name,
                type=Node.SOURCE )

            # sample node
            sample_name = self._create_name(row, internal_sample_column_index, internal_file_column_index)                                
            sample_node, is_sample_new = Node.objects.get_or_create(
                study=study,
                name=sample_name,
                type=Node.SAMPLE )            
            source_node.add_child( sample_node )

            # assay node
            assay_name = self._create_name(row, internal_assay_column_index, internal_file_column_index)                                
            assay_node, is_assay_new = Node.objects.get_or_create(
                study=study,
                assay=assay,
                name=assay_name,
                type=Node.ASSAY )            
            sample_node.add_child( assay_node )
            
            file_node = Node.objects.create(
                study=study,
                assay=assay,
                name=row[internal_file_column_index].strip(),
                file_uuid=file_uuid,
                type=Node.RAW_DATA_FILE,
                species=self._get_species( row ),
                genome_build=self._get_genome_build( row ),
                is_annotation=self._is_annotation( row ) )
            assay_node.add_child( file_node )
            
            # iterate over columns to create attributes to attach to the sample node
            for column_index in range( 0, len( row ) ):
                # skip data file column
                if ( internal_file_column_index == column_index ) or ( internal_auxiliary_file_column_index == column_index ) or ( self.annotation_column_index == column_index ):
                    continue
                
                # create attribute as characteristic and attach to sample node if the sample node was newly created
                if is_sample_new:
                    attribute = Attribute.objects.create(
                        node=sample_node,
                        type=Attribute.CHARACTERISTICS,
                        subtype=headers[column_index].strip().lower(),
                        value=row[column_index].strip() )             
             
        return investigation
예제 #12
0
def createIGVsession(genome, uuids, is_file_uuid=False):
    """ Creates session file for selected file uuids, returns newly created filestore uuid 
    
    :param genome: Genome to be used in session file i.e. hg18, dm3
    :type genome: string.
    :param uuids: Array of UUIDs to be used
    :type uuids: array.
    :param uuids: Host URL i.e. 127.0.0.1:8000
    :type uuids: string
    """
    
    # Create IGV Session file and put into Filestore
    """
    http://www.postneo.com/projects/pyxml/
    
    <?xml version="1.0" encoding="UTF-8"?>
        <Global genome="hg18" locus="EGFR" version="3">
        <Resources>
            <Resource name="RNA Genes" path="http://www.broadinstitute.org/igvdata/tcga/gbm/GBM_batch1-8_level3_exp.txt.recentered.080820.gct.tdf"/>
            <Resource name="RNA Genes" path="http://www.broadinstitute.org/igvdata/annotations/hg18/rna_genes.bed"/>
            <Resource name="sno/miRNA" path="http://www.broadinstitute.org/igvdata/tcga/gbm/Sample_info.txt"/>
        </Resources>
    </Global>
    """
    logger.debug("visualization_manager.createIGVsession called")
    
    # Create the minidom document
    doc = Document()
    
    # Create the <wml> base element
    xml = doc.createElement("Global")
    xml.setAttribute("genome", genome)
    xml.setAttribute("locus", "All")
    xml.setAttribute("version", "4")
    doc.appendChild(xml)
    
    # Add Resources
    xml_resources = doc.createElement("Resources")
    xml.appendChild(xml_resources)
    
    # get paths to url 
    for samp in uuids:
        # gets filestore item 
        curr_name, curr_url = get_file_name(samp, is_file_uuid=is_file_uuid)

        logger.debug( 'New resource: ' + curr_name + ' - ' +  curr_url )
        
        # What to do if fs does not exist? 
        if (curr_name):
            
            # creates Resource element 
            res = doc.createElement("Resource")
            res.setAttribute("name", curr_name)
            res.setAttribute("path", curr_url)
            xml_resources.appendChild(res)
            
    
    # Creating temp file to enter into file_store
    tempfilename = tempfile.NamedTemporaryFile(delete=False)
    tempfilename.write(doc.toprettyxml(indent="  "))
    tempfilename.close()
    
    # getting file_store_uuid
    filestore_uuid = create(tempfilename.name, permanent=True, filetype="xml")
    filestore_item = import_file(filestore_uuid, permanent=True, refresh=True)
    
    # file to rename
    temp_name = filestore_item.datafile.name.split('/')
    temp_name = temp_name[len(temp_name)-1] + '.xml'
    
    # rename file by way of file_store
    filestore_item = rename(filestore_uuid, temp_name)
    
    # delete temp file
    os.unlink(tempfilename.name)
    
    # Print our newly created XML
    #print doc.toprettyxml(indent="  ")
    #print filestore_item.datafile.url
    
    # Url for session file 
    fs_url = filestore_item.get_full_url()
    
    # IGV url for automatic launch of Java Webstart
    igv_url = "http://www.broadinstitute.org/igv/projects/current/igv.php?sessionURL=" + fs_url
    
    return igv_url
예제 #13
0
def addIGVSamples(fields, results_samp, annot_samples=None):
    """ creates phenotype file for IGV 
    
    :param samples: Solr results for samples to be included 
    :type samples: Array.
    :param annot_samples: includes annotation files included with solr results
    :type annot_samples: Array
    """
    
    #logger.debug("visualization_manager.views addIGVSamples called, fields=%s" % fields)
    
    # creates human readable indexes of fields to iterate over
    fields_dict = {}
    for i in fields:
        find_index = i.find("_Characteristics_")
        if find_index > -1:
            new_key = i.split("_Characteristics_")[0]
            fields_dict[i] = new_key
    
    # Creating temp file to enter into file_store
    tempsampname = tempfile.NamedTemporaryFile(delete=False)
    
    # writing header to sample file 
    tempsampname.write("#sampleTable" + "\n")
    
    # writing column names to sample file 
    col_names = "Linking_id"
    for k,v in fields_dict.iteritems():
        col_names = col_names + '\t' + v
    tempsampname.write(col_names + "\n")
    
    # iterating over sample files 
    pheno_results = get_sample_lines(fields_dict, results_samp)
    tempsampname.write(pheno_results)
    
    # if annotations are not null
    if annot_samples:
        #results_annot = annot_samples["response"]["docs"]
        pheno_annot = get_sample_lines(fields_dict, annot_samples)
        tempsampname.write(pheno_annot)
        
    # closing temp file 
    tempsampname.close()

    # getting file_store_uuid
    filestore_uuid = create(tempsampname.name, permanent=True, filetype="txt")
    filestore_item = import_file(filestore_uuid, permanent=True, refresh=True)
    
    # file to rename
    temp_file = filestore_item.datafile.name.split('/')
    temp_file = temp_file[len(temp_file)-1] + '.txt'
    
    # rename file by way of file_store
    filestore_item = rename(filestore_uuid, temp_file)
    
    # getting file information based on file_uuids
    curr_fs = FileStoreItem.objects.get(uuid=filestore_uuid)
    curr_name = curr_fs.datafile.name
    
    # full path to selected UUID File
    curr_url = curr_fs.get_full_url()
    
    # delete temp file
    os.unlink(tempsampname.name)
    
    return curr_url
예제 #14
0
def createIGVsessionAnnot(genome, uuids, annot_uuids=None, samp_file=None):
    """ Creates session file for selected file uuids, returns newly created filestore uuid 
    
    :param genome: Genome to be used in session file i.e. hg18, dm3
    :type genome: string.
    :param uuids: Array of UUIDs to be used
    :type uuids: array.
    :param uuids: Host URL i.e. 127.0.0.1:8000
    :type uuids: string
    """
    
    # Create IGV Session file and put into Filestore
    """
    http://www.postneo.com/projects/pyxml/
    
    <?xml version="1.0" encoding="UTF-8"?>
        <Global genome="hg18" locus="EGFR" version="3">
        <Resources>
            <Resource name="RNA Genes" path="http://www.broadinstitute.org/igvdata/tcga/gbm/GBM_batch1-8_level3_exp.txt.recentered.080820.gct.tdf"/>
            <Resource name="RNA Genes" path="http://www.broadinstitute.org/igvdata/annotations/hg18/rna_genes.bed"/>
            <Resource name="sno/miRNA" path="http://www.broadinstitute.org/igvdata/tcga/gbm/Sample_info.txt"/>
        </Resources>
    </Global>
    """
    #logger.debug("visualization_manager.views createIGVsessionAnnot called")
    
    # Create the minidom document
    doc = Document()
    
    # Create the <wml> base element
    xml = doc.createElement("Global")
    xml.setAttribute("genome", genome)
    xml.setAttribute("locus", "All")
    xml.setAttribute("version", "4")
    doc.appendChild(xml)
    
    # Add Resources
    xml_resources = doc.createElement("Resources")
    xml.appendChild(xml_resources)        
    
    # adding selected samples to xml file
    addIGVResource(uuids["node_uuid"], xml_resources, doc)
    
    if annot_uuids:
        # adding selected samples to xml file
        addIGVResource(annot_uuids["node_uuid"], xml_resources, doc)
        
        
    # adds sample information file to IGV session file 
    if samp_file:
        #<Resource name="Sample Information" path="http://igv.broadinstitute.org/data/hg18/tcga/gbm/gbmsubtypes/sampleTable.txt.gz"/>
        # creates Resource element 
        res = doc.createElement("Resource")
        res.setAttribute("name", "Sample Information")
        res.setAttribute("path", samp_file)
        xml_resources.appendChild(res)    
    
    #<HiddenAttributes>
    #    <Attribute name="DATA FILE"/>
    #    <Attribute name="Linking_id"/>
    #    <Attribute name="DATA TYPE"/>
    #</HiddenAttributes>
    # Adding parameters to hide basic unnecessary sample info
    hidden_attr = doc.createElement("HiddenAttributes")
    xml.appendChild(hidden_attr) 
    
    attr = doc.createElement("Attribute")
    attr.setAttribute("name", "DATA FILE")
    hidden_attr.appendChild(attr) 
    
    attr = doc.createElement("Attribute")
    attr.setAttribute("name", "Linking_id")
    hidden_attr.appendChild(attr) 
    
    attr = doc.createElement("Attribute")
    attr.setAttribute("name", "DATA TYPE")
    hidden_attr.appendChild(attr) 
    
    
    # Creating temp file to enter into file_store
    tempfilename = tempfile.NamedTemporaryFile(delete=False)
    tempfilename.write(doc.toprettyxml(indent="  "))
    tempfilename.close()
    
    # getting file_store_uuid
    filestore_uuid = create(tempfilename.name, permanent=True, filetype="xml")
    filestore_item = import_file(filestore_uuid, permanent=True, refresh=True)
    
    # file to rename
    temp_name = filestore_item.datafile.name.split('/')
    temp_name = temp_name[len(temp_name)-1] + '.xml'
    
    # rename file by way of file_store
    filestore_item = rename(filestore_uuid, temp_name)
    
    # delete temp file
    os.unlink(tempfilename.name)
    
    # Print our newly created XML
    #logger.info( doc.toprettyxml(indent="  "))
    #print filestore_item.datafile.url
    
    # Url for session file 
    fs_url = filestore_item.get_full_url()
    
    # IGV url for automatic launch of Java Webstart
    igv_url = "http://www.broadinstitute.org/igv/projects/current/igv.php?sessionURL=" + fs_url
    
    return igv_url
    def run(self):
        # create investigation, study and assay objects
        investigation = self._create_investigation()
        # FIXME: self.metadata_file.name may not be informative, especially in
        # case of temp files that don't exist on disk
        study = self._create_study(investigation=investigation,
                                   file_name=self.metadata_file.name)
        assay = self._create_assay(study=study,
                                   file_name=self.metadata_file.name)

        # import in file as "pre-isa" file
        logger.info("trying to add pre-isa archive file %s",
                    self.metadata_file.name)
        # FIXME: this will not create a FileStoreItem if self.metadata_file
        # does not exist on disk (e.g., a file object like TemporaryFile)
        investigation.pre_isarchive_file = create(
            self.metadata_file.name, permanent=True)
        import_file(investigation.pre_isarchive_file, refresh=True)
        investigation.save()

        # TODO: test if there are fewer columns than required
        logger.debug("Parsing with file column %s and "
                     "auxiliary file column %s",
                     self.file_column_index, self.auxiliary_file_column_index)
        # UUIDs of data files to postpone importing until parsing is finished
        data_files = []
        # iterate over non-header rows in file
        for row in self.metadata_reader:
            # TODO: resolve relative indices
            internal_source_column_index = self.source_column_index
            internal_sample_column_index = self.sample_column_index
            internal_assay_column_index = self.assay_column_index
            # add data file to file store
            data_file_path = self.file_source_translator(
                row[self.file_column_index])
            data_file_uuid = create(
                source=data_file_path, permanent=self.file_permanent)
            data_files.append(data_file_uuid)
            # add auxiliary file to file store
            if self.auxiliary_file_column_index:
                auxiliary_file_path = self.file_source_translator(
                    row[self.auxiliary_file_column_index])
                auxiliary_file_uuid = create(
                    source=auxiliary_file_path, permanent=self.file_permanent)
                data_files.append(auxiliary_file_uuid)
            else:
                auxiliary_file_uuid = None
            # add files to file server
            # TODO: add error handling in case of None values for UUIDs
            file_server.models.add(data_file_uuid, auxiliary_file_uuid)
            # create nodes if file was successfully created
            # source node
            source_name = self._create_name(
                row, internal_source_column_index, self.file_column_index)
            source_node, is_source_new = Node.objects.get_or_create(
                study=study, name=source_name, type=Node.SOURCE)
            # sample node
            sample_name = self._create_name(
                row, internal_sample_column_index, self.file_column_index)
            sample_node, is_sample_new = Node.objects.get_or_create(
                study=study, name=sample_name, type=Node.SAMPLE)
            source_node.add_child(sample_node)
            # assay node
            assay_name = self._create_name(
                row, internal_assay_column_index, self.file_column_index)
            assay_node, is_assay_new = Node.objects.get_or_create(
                study=study, assay=assay, name=assay_name, type=Node.ASSAY)
            sample_node.add_child(assay_node)
            file_node = Node.objects.create(
                study=study, assay=assay,
                name=row[self.file_column_index].strip(),
                file_uuid=data_file_uuid, type=Node.RAW_DATA_FILE,
                species=self._get_species(row),
                genome_build=self._get_genome_build(row),
                is_annotation=self._is_annotation(row))
            assay_node.add_child(file_node)
            # iterate over columns to create attributes to attach to sample
            # node
            for column_index in range(0, len(row)):
                # skip data file column
                if (self.file_column_index == column_index or
                        self.auxiliary_file_column_index == column_index or
                        self.annotation_column_index == column_index):
                    continue
                # create attribute as characteristic and attach to sample node
                # if the sample node was newly created
                if is_sample_new:
                    Attribute.objects.create(
                        node=sample_node, type=Attribute.CHARACTERISTICS,
                        subtype=self.headers[column_index].strip().lower(),
                        value=row[column_index].strip()
                    )
        # kick off data file importing tasks
        for uuid in data_files:
            import_file.delay(uuid)
        return investigation
예제 #16
0
    def run(self):
        # create investigation, study and assay objects
        investigation = self._create_investigation()
        # FIXME: self.metadata_file.name may not be informative, especially in
        # case of temp files that don't exist on disk
        study = self._create_study(investigation=investigation,
                                   file_name=self.metadata_file.name)
        assay = self._create_assay(study=study,
                                   file_name=self.metadata_file.name)

        # import in file as "pre-isa" file
        logger.info("trying to add pre-isa archive file %s",
                    self.metadata_file.name)
        # FIXME: this will not create a FileStoreItem if self.metadata_file
        # does not exist on disk (e.g., a file object like TemporaryFile)
        investigation.pre_isarchive_file = create(self.metadata_file.name)
        import_file(investigation.pre_isarchive_file, refresh=True)
        investigation.save()

        # TODO: test if there are fewer columns than required
        logger.debug(
            "Parsing with file column %s and "
            "auxiliary file column %s", self.file_column_index,
            self.auxiliary_file_column_index)
        # UUIDs of data files to postpone importing until parsing is finished
        data_files = []
        # iterate over non-header rows in file
        for row in self.metadata_reader:
            # TODO: resolve relative indices
            internal_source_column_index = self.source_column_index
            internal_sample_column_index = self.sample_column_index
            internal_assay_column_index = self.assay_column_index
            # add data file to file store
            data_file_path = self.file_source_translator(
                row[self.file_column_index])
            data_file_uuid = create(source=data_file_path)
            data_files.append(data_file_uuid)
            # add auxiliary file to file store
            if self.auxiliary_file_column_index:
                auxiliary_file_path = self.file_source_translator(
                    row[self.auxiliary_file_column_index])
                auxiliary_file_uuid = create(source=auxiliary_file_path)
                data_files.append(auxiliary_file_uuid)
            else:
                auxiliary_file_uuid = None
            # add files to file server
            # TODO: add error handling in case of None values for UUIDs
            file_server.models.add(data_file_uuid, auxiliary_file_uuid)
            # create nodes if file was successfully created
            # source node
            source_name = self._create_name(row, internal_source_column_index,
                                            self.file_column_index)
            source_node, is_source_new = Node.objects.get_or_create(
                study=study, name=source_name, type=Node.SOURCE)
            # sample node
            sample_name = self._create_name(row, internal_sample_column_index,
                                            self.file_column_index)
            sample_node, is_sample_new = Node.objects.get_or_create(
                study=study, name=sample_name, type=Node.SAMPLE)
            source_node.add_child(sample_node)
            # assay node
            assay_name = self._create_name(row, internal_assay_column_index,
                                           self.file_column_index)
            assay_node, is_assay_new = Node.objects.get_or_create(
                study=study, assay=assay, name=assay_name, type=Node.ASSAY)
            sample_node.add_child(assay_node)
            file_node = Node.objects.create(
                study=study,
                assay=assay,
                name=row[self.file_column_index].strip(),
                file_uuid=data_file_uuid,
                type=Node.RAW_DATA_FILE,
                species=self._get_species(row),
                genome_build=self._get_genome_build(row),
                is_annotation=self._is_annotation(row))
            assay_node.add_child(file_node)
            # iterate over columns to create attributes to attach to sample
            # node
            for column_index in range(0, len(row)):
                # skip data file column
                if (self.file_column_index == column_index
                        or self.auxiliary_file_column_index == column_index
                        or self.annotation_column_index == column_index):
                    continue
                # create attribute as characteristic and attach to sample node
                # if the sample node was newly created
                if is_sample_new:
                    Attribute.objects.create(
                        node=sample_node,
                        type=Attribute.CHARACTERISTICS,
                        subtype=self.headers[column_index].strip().lower(),
                        value=row[column_index].strip())

        # Start remote file import tasks if `Make Import Permanent:` flag set
        # by the user
        # Likewise, we'll try to import these files if their source begins with
        # our REFINERY_DATA_IMPORT_DIR setting (This will be the case if
        # users upload datafiles associated with their metadata)

        for uuid in data_files:
            try:
                file_store_item = FileStoreItem.objects.get(uuid=uuid)
            except (FileStoreItem.DoesNotExist,
                    FileStoreItem.MultipleObjectsReturned) as e:
                logger.error("Couldn't properly fetch FileStoreItem %s", e)
            else:

                if (self.file_permanent or file_store_item.source.startswith(
                    (settings.REFINERY_DATA_IMPORT_DIR, 's3://'))):
                    import_file.delay(uuid)

        return investigation
예제 #17
0
def add_igv_samples(fields, results_samp, annot_samples=None):
    """creates phenotype file for IGV
    :param samples: Solr results for samples to be included
    :type samples: Array.
    :param annot_samples: includes annotation files included with solr results
    :type annot_samples: Array
    """
    # creates human readable indexes of fields to iterate over
    fields_dict = {}
    for i in fields:
        find_index = i.find("_Characteristics_")
        if find_index > -1:
            new_key = i.split("_Characteristics_")[0]
            fields_dict[i] = new_key

    # Creating temp file to enter into file_store
    temp_sample_name = tempfile.NamedTemporaryFile(delete=False)

    # writing header to sample file
    temp_sample_name.write("#sampleTable" + "\n")

    # writing column names to sample file
    col_names = "Linking_id"
    for k, v in fields_dict.iteritems():
        col_names = col_names + "\t" + v
    temp_sample_name.write(col_names + "\n")

    # iterating over sample files
    pheno_results = get_sample_lines(fields_dict, results_samp)
    try:
        temp_sample_name.write(pheno_results)
    except UnicodeEncodeError as e:
        logger.error(
            "Could not write results to file: %s. " "Trying again with the content to write encoded " "properly.", e
        )
        temp_sample_name.write(pheno_results.encode("utf-8"))

    # if annotations are not null
    if annot_samples:
        pheno_annot = get_sample_lines(fields_dict, annot_samples)
        temp_sample_name.write(pheno_annot)

    # closing temp file
    temp_sample_name.close()

    # getting file_store_uuid
    filestore_uuid = create(temp_sample_name.name, filetype="txt")
    filestore_item = import_file(filestore_uuid, refresh=True)

    # file to rename
    temp_file = filestore_item.datafile.name.split("/")
    temp_file = temp_file[len(temp_file) - 1] + ".txt"

    # rename file by way of file_store
    filestore_item = rename(filestore_uuid, temp_file)

    # getting file information based on file_uuids
    curr_fs = FileStoreItem.objects.get(uuid=filestore_uuid)

    # full path to selected UUID File
    curr_url = get_full_url(curr_fs.get_datafile_url())

    # delete temp file
    os.unlink(temp_sample_name.name)

    return curr_url