Esempio n. 1
0
 def init_meta( self, dataset, copy_from=None ):
     Tabular.init_meta( self, dataset, copy_from=copy_from )
     self.column_names = ['DB',  'DB Object ID', 'DB Object Symbol', 'Qualifier',  'GO ID',
                          'DB:Reference (|DB:Reference)', 'Evidence Code', 'With (or) From', 
                          'Aspect', 'DB Object', 'DB Object Synonym (|Synonym)', 'DB Object Type', 
                          'Taxon(|taxon)', 'Date', 'Assigned By', 'Annotation Extension', 'Gene Product Form ID', 
                          ]
Esempio n. 2
0
 def set_meta( self, dataset, overwrite = True, **kwd ):
     """Sets the metadata information for datasets previously determined to be in bed format."""
     i = 0
     if dataset.has_data():
         for i, line in enumerate( file(dataset.file_name) ):
             metadata_set = False
             line = line.rstrip('\r\n')
             if line and not line.startswith('#'):
                 elems = line.split('\t')
                 if len(elems) > 2:
                     for startswith in data.col1_startswith:
                         if line.lower().startswith( startswith ):
                             if len( elems ) > 3:
                                 if overwrite or not dataset.metadata.element_is_set( 'nameCol' ):
                                     dataset.metadata.nameCol = 4
                             if len(elems) < 6:
                                 if overwrite or not dataset.metadata.element_is_set( 'strandCol' ):
                                     dataset.metadata.strandCol = 0
                             else:
                                 if overwrite or not dataset.metadata.element_is_set( 'strandCol' ):
                                     dataset.metadata.strandCol = 6
                             metadata_set = True
                             break
             if metadata_set: break
         Tabular.set_meta( self, dataset, overwrite = overwrite, skip = i )
Esempio n. 3
0
 def set_meta( self, dataset, overwrite = True, **kwd ):
     i = 0
     for i, line in enumerate( file ( dataset.file_name ) ):
         line = line.rstrip('\r\n')
         if line and not line.startswith( '#' ):
             elems = line.split( '\t' )
             valid_start = False
             valid_end = False
             if len( elems ) == 9:
                 try:
                     start = int( elems[3] )
                     valid_start = True                                    
                 except:
                     if elems[3] == '.':
                         valid_start = True                                        
                 try:
                     end = int( elems[4] )
                     valid_end = True
                 except:
                     if elems[4] == '.':
                         valid_end = True
                 strand = elems[6]
                 phase = elems[7]
                 if valid_start and valid_end and start < end and strand in self.valid_gff3_strand and phase in self.valid_gff3_phase:
                     break
     Tabular.set_meta( self, dataset, overwrite = overwrite, skip = i )
 def __init__(self, **kwd):
     """Initialize CG_Var datatype"""
     Tabular.__init__( self, **kwd )
     self.column_names = ['locus', 'ploidy', 'allele', 'chromosome', 'begin', 'end',
                          'varType', 'reference', 'alleleSeq', 'varScoreVAF',
                          'varScoreEAF', 'varQuality', 'hapLink', 'xRef'
                          ]
Esempio n. 5
0
 def __init__(self, **kwd):
     """
     Initialize gg datatype, by adding UCSC display apps
     """
     Tabular.__init__(self, **kwd)
     self.add_display_app('ucsc', 'Genome Graph', 'as_ucsc_display_file',
                          'ucsc_links')
Esempio n. 6
0
 def __init__(self, **kwd):
     Tabular.__init__(self, **kwd)
     self.column_names = [
         'Protein', 'Peptide', 'Assumed Charge',
         'Neutral Pep Mass (calculated)', 'Neutral Mass', 'Retention Time',
         'Start Scan', 'End Scan', 'Search Engine',
         'PeptideProphet Probability', 'Interprophet Probabaility'
     ]
Esempio n. 7
0
 def __init__(self, **kwd):
     Tabular.__init__(self, **kwd)
     self.column_names = [
         'ID', 'local_max', 'min_count_aTIS', 'R_aTis', 'min_count_5UTR',
         'R_5UTR', 'min_count_CDS', 'R_CDS', 'min_count_3UTR', 'R_3UTR',
         'min_count_no_trans', 'R_no_trans', 'SNP'
     ]
     self.columns = 13
Esempio n. 8
0
 def set_meta(self, dataset, **kwd):
     Tabular.set_meta(self, dataset, **kwd)
     dataset.metadata.markerCol = 1
     header = open(dataset.file_name, 'r').readlines()[0].strip().split('\t')
     dataset.metadata.columns = len(header)
     t = ['numeric' for x in header]
     t[0] = 'string'
     dataset.metadata.column_types = t
     return True
Esempio n. 9
0
 def set_meta(self, dataset, **kwd):
     Tabular.set_meta( self, dataset, **kwd)
     dataset.metadata.markerCol = 1
     header = file(dataset.file_name, 'r').readlines()[0].strip().split('\t')
     dataset.metadata.columns = len(header)
     t = ['numeric' for x in header]
     t[0] = 'string'
     dataset.metadata.column_types = t
     return True
 def __init__(self, **kwd):
     """Initialize CG_Gene datatype"""
     Tabular.__init__( self, **kwd )
     self.column_names = ['index', 'locus', 'allele', 'chromosome', 'begin', 'end',
                          'varType', 'reference', 'call', 'xRef', 'geneId',
                          'mrnaAcc', 'proteinAcc', 'symbol', 'orientation', 'component',
                          'componentIndex', 'hasCodingRegion', 'impact', 'nucleotidePos',
                          'proteinPos', 'annotationRefSequence', 'sampleSequence',
                          'genomeRefSequence', 'pfam'
                          ]
Esempio n. 11
0
 def set_meta(self, dataset, overwrite=True, **kwd):
     Tabular.set_meta(self,
                      dataset,
                      overwrite=overwrite,
                      max_data_lines=None,
                      max_guess_type_data_lines=1000,
                      **kwd)
     if dataset.metadata.comment_metadata is None:
         dataset_comment_metadata = DatasetCommentMetadata(dataset)
         dataset.metadata.comment_metadata = dataset_comment_metadata.comment_metadata.copy(
         )
         self.set_dataset_metadata_from_comments(dataset)
Esempio n. 12
0
 def __init__(self, **kwd):
     Tabular.__init__(self, **kwd)
     self.column_names = [
         "Entry Number", "Group Probability",
         "Protein", "Protein Link", "Protein Probability",
         "Percent Coverage", "Number of Unique Peptides",
         "Total Independent Spectra", "Percent Share of Spectrum ID's",
         "Description", "Protein Molecular Weight", "Protein Length",
         "Is Nondegenerate Evidence", "Weight", "Precursor Ion Charge",
         "Peptide sequence", "Peptide Link", "NSP Adjusted Probability",
         "Initial Probability", "Number of Total Termini",
         "Number of Sibling Peptides Bin", "Number of Instances",
         "Peptide Group Designator", "Is Evidence?"]
Esempio n. 13
0
 def __init__(self, **kwd):
     Tabular.__init__(self, **kwd)
     self.column_names = [
         "Entry Number", "Group Probability",
         "Protein", "Protein Link", "Protein Probability",
         "Percent Coverage", "Number of Unique Peptides",
         "Total Independent Spectra", "Percent Share of Spectrum ID's",
         "Description", "Protein Molecular Weight", "Protein Length",
         "Is Nondegenerate Evidence", "Weight", "Precursor Ion Charge",
         "Peptide sequence", "Peptide Link", "NSP Adjusted Probability",
         "Initial Probability", "Number of Total Termini",
         "Number of Sibling Peptides Bin", "Number of Instances",
         "Peptide Group Designator", "Is Evidence?"]
Esempio n. 14
0
 def set_meta( self, dataset, overwrite = True, **kwd ):
     i = 0
     for i, line in enumerate( file ( dataset.file_name ) ):
         line = line.rstrip('\r\n')
         if line and not line.startswith( '#' ):
             elems = line.split( '\t' )
             if len(elems) == 9:
                 try:
                     int( elems[3] )
                     int( elems[4] )
                     break
                 except:
                     pass
     Tabular.set_meta( self, dataset, overwrite = overwrite, skip = i )
 def __init__(self, **kwd):
     """Initialize CG_MasterVar datatype"""
     Tabular.__init__( self, **kwd )
     self.column_names = ['locus', 'ploidy', 'chromosome', 'begin', 'end', 'zygosity',
                          'varType', 'reference', 'allele1Seq', 'allele2Seq',
                          'allele1VarScoreVAF', 'allele2VarScoreVAF', 'allele1VarScoreEAF',
                          'allele2VarScoreEAF', 'allele1VarQuality', 'allele2VarQuality',
                          'allele1HapLink', 'allele2HapLink', 'allele1XRef', 'allele2XRef',
                          'evidenceIntervalId', 'allele1ReadCount', 'allele2ReadCount',
                          'referenceAlleleRead', 'totalReadCount', 'allele1Gene',
                          'allele2Gene	pfam', 'miRBaseId', 'repeatMasker', 'segDupOverlap',
                          'relativeCoverageDiploid', 'calledPloidy',
                          'relativeCoverageNondiploid', 'calledLevel'
                          ]
Esempio n. 16
0
def test_tabular_set_meta_large_file():
    with tempfile.NamedTemporaryFile(mode='w') as test_file:
        for _ in range(MAX_DATA_LINES + 1):
            test_file.write("A\tB\n")
        test_file.flush()
        dataset = MockDataset(id=1)
        dataset.file_name = test_file.name
        Tabular().set_meta(dataset)
Esempio n. 17
0
    def set_meta(self, dataset, overwrite=True, skip=None, max_data_lines=None, **kwd):
        Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines)
        tis_args = set()
        try:
            fh = open(dataset.file_name)

            for line in fh:
                fields = line.strip().split("\t")
                try:
                    tis_args.add(fields[0])
                except IndexError:
                    pass
            dataset.metadata.args = []
            dataset.metadata.args += tis_args

        finally:
            fh.close()
Esempio n. 18
0
 def __init__(self, **kwd):
     Tabular.__init__(self, **kwd)
     self.column_names = [
         "ID",
         "local_max",
         "min_count_aTIS",
         "R_aTis",
         "min_count_5UTR",
         "R_5UTR",
         "min_count_CDS",
         "R_CDS",
         "min_count_3UTR",
         "R_3UTR",
         "min_count_no_trans",
         "R_no_trans",
         "SNP",
     ]
     self.columns = 13
Esempio n. 19
0
 def set_meta( self, dataset, overwrite = True, **kwd ):
     i = 0
     for i, line in enumerate( file ( dataset.file_name ) ):
         line = line.rstrip('\r\n')
         if line and not line.startswith( '#' ):
             elems = line.split( '\t' )
             try:
                 float( elems[0] ) #"Wiggle track data values can be integer or real, positive or negative values"
                 break
             except:
                 do_break = False
                 for str in data.col1_startswith:
                     if elems[0].lower().startswith(str):
                         do_break = True
                         break
                 if do_break:
                     break
     Tabular.set_meta( self, dataset, overwrite = overwrite, skip = i )
Esempio n. 20
0
 def init_meta(self, dataset, copy_from=None):
     Tabular.init_meta(self, dataset, copy_from=copy_from)
     self.column_names = [
         'DB',
         'DB Object ID',
         'DB Object Symbol',
         'Qualifier',
         'GO ID',
         'DB:Reference (|DB:Reference)',
         'Evidence Code',
         'With (or) From',
         'Aspect',
         'DB Object',
         'DB Object Synonym (|Synonym)',
         'DB Object Type',
         'Taxon(|taxon)',
         'Date',
         'Assigned By',
         'Annotation Extension',
         'Gene Product Form ID',
     ]
Esempio n. 21
0
    def set_meta(self,
                 dataset,
                 overwrite=True,
                 skip=None,
                 max_data_lines=None,
                 **kwd):
        Tabular.set_meta(self, dataset, overwrite, skip, max_data_lines)
        tis_args = set()
        try:
            fh = open(dataset.file_name)

            for line in fh:
                fields = line.strip().split('\t')
                try:
                    tis_args.add(fields[0])
                except IndexError:
                    pass
            dataset.metadata.args = []
            dataset.metadata.args += tis_args

        finally:
            fh.close()
Esempio n. 22
0
 def __init__(self, **kwd):
     Tabular.__init__(self, **kwd)
     """Initialize RsemResults datatype"""
     self.comment_lines = 1
 def set_peek(self, dataset):
     Tabular.set_peek(self, dataset)
     if not dataset.dataset.purged:
         dataset.blurb += f' dim: {str(dataset.metadata.dimension)}'
Esempio n. 24
0
 def set_meta( self, dataset, overwrite = True, **kwd ):
     Tabular.set_meta( self, dataset, overwrite = overwrite, skip = 1 )
 def set_meta( self, dataset, **kwd ):
     Tabular.set_meta( self, dataset, **kwd )
Esempio n. 26
0
 def __init__(self, **kwd):
     """Initialize interval datatype, by adding UCSC display app"""
     Tabular.__init__(self, **kwd)
     self.add_display_app ( 'ucsc', 'display at UCSC', 'as_ucsc_display_file', 'ucsc_links' )
Esempio n. 27
0
 def __init__(self, **kwd):
     """Initialize datatype, by adding GBrowse display app"""
     Tabular.__init__(self, **kwd)
     self.add_display_app ('c_elegans', 'display in Wormbase', 'as_gbrowse_display_file', 'gbrowse_links' )
Esempio n. 28
0
 def __init__(self, **kwd):
     """
     Initialize featurelistt datatype
     """
     Tabular.__init__(self, **kwd)
     self.column_names = []
Esempio n. 29
0
 def set_meta( self, dataset, overwrite = True, first_line_is_header = False, **kwd ):
     Tabular.set_meta( self, dataset, overwrite = overwrite, skip = 0 )
     
     """Tries to guess from the line the location number of the column for the chromosome, region start-end and strand"""
     if dataset.has_data():
         for i, line in enumerate( file( dataset.file_name ) ):
             line = line.rstrip( '\r\n' )
             if line:
                 if ( first_line_is_header or line[0] == '#' ):
                     self.init_meta( dataset )
                     line = line.strip( '#' )
                     elems = line.split( '\t' )
                     valid = dict( alias_helper ) # shrinks
                     for index, col_name in enumerate( elems ):
                         if col_name in valid:
                             meta_name = valid[col_name]
                             if overwrite or not dataset.metadata.element_is_set( meta_name ):
                                 setattr( dataset.metadata, meta_name, index+1 )
                             values = alias_spec[ meta_name ]
                             start = values.index( col_name )
                             for lower in values[ start: ]:
                                 del valid[ lower ]  # removes lower priority keys 
                     break  # Our metadata is set, so break out of the outer loop
                 else: 
                     # Header lines in Interval files are optional. For example, BED is Interval but has no header.
                     # We'll make a best guess at the location of the metadata columns.
                     metadata_is_set = False
                     elems = line.split( '\t' )
                     if len( elems ) > 2:
                         for str in data.col1_startswith:
                             if line.lower().startswith( str ):
                                 if overwrite or not dataset.metadata.element_is_set( 'chromCol' ):
                                     dataset.metadata.chromCol = 1
                                 try:
                                     int( elems[1] )
                                     if overwrite or not dataset.metadata.element_is_set( 'startCol' ):
                                         dataset.metadata.startCol = 2
                                 except:
                                     pass # Metadata default will be used
                                 try:
                                     int( elems[2] )
                                     if overwrite or not dataset.metadata.element_is_set( 'endCol' ):
                                         dataset.metadata.endCol = 3
                                 except:
                                     pass # Metadata default will be used
                                 if len( elems ) > 3:
                                     try:
                                         int( elems[3] )
                                     except:
                                         if overwrite or not dataset.metadata.element_is_set( 'nameCol' ):
                                             dataset.metadata.nameCol = 4 
                                 if len( elems ) < 6 or elems[5] not in data.valid_strand:
                                     if overwrite or not dataset.metadata.element_is_set(  'strandCol' ):
                                         dataset.metadata.strandCol = 0
                                 else:
                                     if overwrite or not dataset.metadata.element_is_set( 'strandCol' ):
                                         dataset.metadata.strandCol = 6
                                 metadata_is_set = True
                                 break
                     if metadata_is_set:
                         break # Our metadata is set, so break out of the outer loop
Esempio n. 30
0
 def init_meta(self, dataset, copy_from=None):
     Tabular.init_meta(self, dataset, copy_from=copy_from)
Esempio n. 31
0
 def display_peek(self, dataset):
     return Tabular.make_html_table(self,
                                    dataset,
                                    column_names=self.column_names)
Esempio n. 32
0
 def display_peek(self, dataset):
     """Returns formated html of peek"""
     return Tabular.make_html_table(self,
                                    dataset,
                                    column_names=self.column_names)
Esempio n. 33
0
 def __init__( self, **kwd ):
     Tabular.__init__( self, **kwd )
     self.add_display_app( 'ucsc', 'display at UCSC', 'as_ucsc_display_file', 'ucsc_links' )
     self.add_display_app( 'gbrowse', 'display in Gbrowse', 'as_gbrowse_display_file', 'gbrowse_links' )
 def __init__(self, **kwd):
     Tabular.__init__( self, **kwd )
     self.column_names = ['Protein', 'Peptide', 'Assumed Charge', 'Neutral Pep Mass (calculated)', 'Neutral Mass', 'Retention Time', 'Start Scan', 'End Scan', 'Search Engine', 'PeptideProphet Probability', 'Interprophet Probabaility']
Esempio n. 35
0
 def init_meta( self, dataset, copy_from=None ):
     Tabular.init_meta( self, dataset, copy_from=copy_from )
Esempio n. 36
0
 def display_peek( self, dataset ):
     return Tabular.make_html_table( self, dataset, column_names=self.column_names )
 def set_meta(self, dataset, **kwd):
     Tabular.set_meta(self, dataset, **kwd)
     if dataset.has_data():
         with open(dataset.file_name, errors='ignore') as fh:
             dataset.metadata.dimension = self._get_dimension(fh)
Esempio n. 38
0
 def set_meta(self, dataset, **kwd):
     Tabular.set_meta(self, dataset, skip=None, **kwd)
Esempio n. 39
0
 def set_meta( self, dataset, overwrite = True, **kwd ):
     Tabular.set_meta( self, dataset, overwrite=overwrite, max_data_lines=None, max_guess_type_data_lines=1000, **kwd )
     if dataset.metadata.comment_metadata is None:
         dataset_comment_metadata = DatasetCommentMetadata( dataset )
         dataset.metadata.comment_metadata = dataset_comment_metadata.comment_metadata.copy()
         self.set_dataset_metadata_from_comments( dataset )
Esempio n. 40
0
 def set_meta( self, dataset, overwrite = True, first_line_is_header = False, **kwd ):
     Tabular.set_meta( self, dataset, overwrite = overwrite, skip = 0 )
     
     """Tries to guess from the line the location number of the column for the chromosome, region start-end and strand"""
     if dataset.has_data():
         empty_line_count = 0
         num_check_lines = 100 # only check up to this many non empty lines
         for i, line in enumerate( file( dataset.file_name ) ):
             line = line.rstrip( '\r\n' )
             if line:
                 if ( first_line_is_header or line[0] == '#' ):
                     self.init_meta( dataset )
                     line = line.strip( '#' )
                     elems = line.split( '\t' )
                     valid = dict( alias_helper ) # shrinks
                     for index, col_name in enumerate( elems ):
                         if col_name in valid:
                             meta_name = valid[col_name]
                             if overwrite or not dataset.metadata.element_is_set( meta_name ):
                                 setattr( dataset.metadata, meta_name, index+1 )
                             values = alias_spec[ meta_name ]
                             start = values.index( col_name )
                             for lower in values[ start: ]:
                                 del valid[ lower ]  # removes lower priority keys 
                     break  # Our metadata is set, so break out of the outer loop
                 else: 
                     # Header lines in Interval files are optional. For example, BED is Interval but has no header.
                     # We'll make a best guess at the location of the metadata columns.
                     metadata_is_set = False
                     elems = line.split( '\t' )
                     if len( elems ) > 2:
                         for str in data.col1_startswith:
                             if line.lower().startswith( str ):
                                 if overwrite or not dataset.metadata.element_is_set( 'chromCol' ):
                                     dataset.metadata.chromCol = 1
                                 try:
                                     int( elems[1] )
                                     if overwrite or not dataset.metadata.element_is_set( 'startCol' ):
                                         dataset.metadata.startCol = 2
                                 except:
                                     pass # Metadata default will be used
                                 try:
                                     int( elems[2] )
                                     if overwrite or not dataset.metadata.element_is_set( 'endCol' ):
                                         dataset.metadata.endCol = 3
                                 except:
                                     pass # Metadata default will be used
                                 #we no longer want to guess that this column is the 'name', name must now be set manually for interval files
                                 #we will still guess at the strand, as we can make a more educated guess
                                 #if len( elems ) > 3:
                                 #    try:
                                 #        int( elems[3] )
                                 #    except:
                                 #        if overwrite or not dataset.metadata.element_is_set( 'nameCol' ):
                                 #            dataset.metadata.nameCol = 4 
                                 if len( elems ) < 6 or elems[5] not in data.valid_strand:
                                     if overwrite or not dataset.metadata.element_is_set(  'strandCol' ):
                                         dataset.metadata.strandCol = 0
                                 else:
                                     if overwrite or not dataset.metadata.element_is_set( 'strandCol' ):
                                         dataset.metadata.strandCol = 6
                                 metadata_is_set = True
                                 break
                     if metadata_is_set or ( i - empty_line_count ) > num_check_lines:
                         break # Our metadata is set or we examined 100 non-empty lines, so break out of the outer loop
             else:
                 empty_line_count += 1
Esempio n. 41
0
 def display_peek( self, dataset ):
     """Returns formated html of peek"""
     return Tabular.make_html_table( self, dataset, skipchars=['track', '#'] )
Esempio n. 42
0
 def __init__(self, **kwd):
     Tabular.__init__( self, **kwd )
     self.column_names = ['ID','local_max','min_count_aTIS','R_aTis','min_count_5UTR','R_5UTR','min_count_CDS','R_CDS','min_count_3UTR','R_3UTR','min_count_no_trans','R_no_trans']
     self.columns = 12
Esempio n. 43
0
 def set_readonly_meta( self, dataset, skip=1, **kwd ):
     """Resets the values of readonly metadata elements."""
     Tabular.set_readonly_meta( self, dataset, skip = skip, **kwd )
Esempio n. 44
0
 def display_peek( self, dataset ):
     """Returns formated html of peek"""
     return Tabular.make_html_table( self, dataset, column_names=self.column_names )
Esempio n. 45
0
 def make_html_table( self, dataset ):
     return Tabular.make_html_table( self, dataset, skipchars=['track', '#'] )
Esempio n. 46
0
 def __init__(self, **kwd):
     Tabular.__init__(self, **kwd)
Esempio n. 47
0
 def __init__(self, **kwd):
     """
     Initialize gg datatype, by adding UCSC display apps
     """
     Tabular.__init__(self, **kwd)
     self.add_display_app('ucsc', 'Genome Graph', 'as_ucsc_display_file', 'ucsc_links')
 def set_meta(self, dataset, **kwd):
     Tabular.set_meta(self, dataset, **kwd)
Esempio n. 49
0
 def __init__(self, **kwd):
     """
     Initialize featurelistt datatype
     """
     Tabular.__init__(self, **kwd)
     self.column_names = []
Esempio n. 50
0
 def display_peek(self, dataset):
     """Returns formated html of peek"""
     return Tabular.make_html_table(self, dataset, skipchars=['#'])
Esempio n. 51
0
 def __init__(self, **kwd):
     Tabular.__init__(self, **kwd)
     self.column_names = ['Column', 'Name', 'Alias']
Esempio n. 52
0
 def __init__(self, **kwd):
     """Initialize QTLMap:Genealogy datatype"""
     Tabular.__init__(self, **kwd)