Python ExpressionBuilder.cleanUpLine Examples

Programming Language: Python

Method/Function: cleanUpLine

Examples at hotexamples.com: 2

Python ExpressionBuilder.cleanUpLine - 2 examples found. These are the top rated real world Python examples of ExpressionBuilder.cleanUpLine from package manycore_form_compiler extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

simplerGroupImport(4)

importTranscriptBiotypeAnnotations(2)

cleanUpLine(2)

checkExpressionFileFormat(1)

importAltMerge(1)

importArrayGroups(1)

importComparisonGroups(1)

importGeneAnnotations(1)

logTransformWithNAs(1)

simpleGroupImport(1)

Example #1

Show file

File: NormalizeDataset.py Project: warrenmcg/altanalyze

def importGroups(fn):
    try: group_db=collections.OrderedDict()
    except Exception:
        try:
            import ordereddict
            group_db=ordereddict.OrderedDict()
        except Exception: group_db={}
    for line in open(fn,'rU').xreadlines():
        data = ExpressionBuilder.cleanUpLine(line)
        sample_filename,group_number,group_name = string.split(data,'\t')
        try: group_db[group_name].append(sample_filename)
        except Exception: group_db[group_name] = [sample_filename]
    return group_db

Example #2

Show file

File: NormalizeDataset.py Project: warrenmcg/altanalyze

def performGroupNormalization(filename,export_dir,platform):
    expressionDataFormat,increment,convertNonLogToLog = ExpressionBuilder.checkExpressionFileFormat(filename)
    groups_dir = string.replace(export_dir,'exp.','batch.')
    fn=unique.filepath(filename); row_number=0; exp_db={}; relative_headers_exported = False
    group_db = importGroups(groups_dir)
    export_data = export.ExportFile(export_dir)
    for line in open(fn,'rU').xreadlines():
        data = ExpressionBuilder.cleanUpLine(line)
        t = string.split(data,'\t')
        if data[0]=='#' and row_number==0: row_number = 0
        elif row_number==0:
            sample_list = t[1:]
            new_sample_list = []
            for group in group_db:
                group_samples = group_db[group]
                try:
                    sample_index_list = map(lambda x: sample_list.index(x), group_samples)
                    group_db[group] = sample_index_list
                    new_sample_list+=group_samples
                except Exception:
                    missing=[]
                    for x in sample_list:
                        if x not in t[1:]: missing.append(x)
                    print 'missing:',missing
                    print t
                    print sample_list
                    print filename, groups_dir
                    print 'Unknown Error!!! Skipping cluster input file build (check column and row formats for conflicts)'; forceExit
            title = string.join([t[0]]+new_sample_list,'\t')+'\n' ### output the new sample order (group file order)
            export_data.write(title)
            row_number=1
        else:
            gene = t[0]
            if expressionDataFormat == 'non-log' and (convertNonLogToLog or platform == 'RNASeq'):
                ### Convert to log2 RPKM values - or counts
    
                try: all_values = map(lambda x: math.log(float(x)+increment,2), t[1:])
                except Exception:
                    all_values = ExpressionBuilder.logTransformWithNAs(t[1:],increment)
            else:
                try: all_values = map(float,t[1:])
                except Exception:
                    all_values = ExpressionBuilder.logTransformWithNAs(t[1:],increment)
            row_number+=1 ### Keep track of the first gene as to write out column headers for the relative outputs
            gene_log_folds = []

            for group in group_db:
                sample_index_list = group_db[group]
                ### Calculate log-fold values relative to the mean of all sample expression values
                try: values = map(lambda x: all_values[x], sample_index_list) ### simple and fast way to reorganize the samples
                except Exception:
                    print len(values), sample_index_list;kill
                try: avg = statistics.avg(values)
                except Exception:
                    values2=[]
                    for v in values:
                        try: values2.append(float(v))
                        except Exception: pass
                    values = values2
                    try: avg = statistics.avg(values)
                    except Exception:
                        if len(values)>0: avg = values[0]
                        else: avg = 0
                try: log_folds = map(lambda x: (x-avg), values)
                except Exception: 
                    log_folds=[]
                    for x in values:
                        try: log_folds.append(x-avg)
                        except Exception: log_folds.append('')
                gene_log_folds+=log_folds                            
            gene_log_folds = map(lambda x: str(x),gene_log_folds)
            export_data.write(string.join([gene]+gene_log_folds,'\t')+'\n')
    export_data.close()