Beispiel #1
0
def sashmi_plot_list(bamdir,eventsToVisualizeFilename,PSIFilename,events=None):
    import gene_associations
    gene_to_symbol = gene_associations.getGeneToUid(species,('hide','Ensembl-Symbol'))
    import OBO_import; symbol_to_gene = OBO_import.swapKeyValues(gene_to_symbol)

    if events==None:
        splicing_events,expandedSearch = importSplicingEventsToVisualize(eventsToVisualizeFilename)
    else:
        ### Replace any ":" from the input events
        #for i in range(len(events)): events[i] = string.replace(events[i],':','__')
        expandedSearch = True
        
        for i in range(len(events)):
            gene = string.split(events[i],'__')[0]
            if gene in gene_to_symbol:
                symbol = gene_to_symbol[gene][0]
            elif 'ENS' not in gene or 'G0000' in gene:
                if gene in symbol_to_gene:
                    ensID = symbol_to_gene[gene][0]
                    symbol = gene
                    events[i] = ensID ### translate this ID to an Ensembl gene ID for propper SashimiPlot lookup
        splicing_events = events ### optionally get from supplied variable

    if len(splicing_events)==0:
        print eventsToVisualizeFilename
        forceNoCompatibleEventsInFile
    
    print 'Exporting plots',
    
    ### Determine Groups for Coloring
    groups_file = 'None'
    dir_list = unique.read_directory(root_dir+'/ExpressionInput')

    for file in dir_list:
         if 'groups.' in file:
            groups_file = root_dir+'/ExpressionInput/'+file

    if groups_file != None:
        try:
            import ExpressionBuilder
            sample_group_db = ExpressionBuilder.simplerGroupImport(groups_file)
            groups=[]
            for sample in sample_group_db:
                if sample_group_db[sample] not in groups:
                    groups.append(sample_group_db[sample]) ### create an ordered list of unique group
        except Exception:
            groups = ['None']
            #print traceback.format_exc()
            pass

    processed_events = formatAndSubmitSplicingEventsToSashimiPlot(PSIFilename, bamdir, splicing_events, sample_group_db, groups, False)
    mopup_events = getMopUpEvents(splicing_events, processed_events)

    ### Do the same for supplied gene queries or junctions that didn't map above using the gene expression values as a guide
    #print len(splicing_events),len(processed_events),len(mopup_events)
    processed_events = formatAndSubmitSplicingEventsToSashimiPlot(steady_state_exp_file,bamdir,mopup_events,sample_group_db,groups,expandedSearch)
    if len(processed_events)>0:
        mopup_events = getMopUpEvents(mopup_events, processed_events)
        processed_events = formatAndSubmitSplicingEventsToSashimiPlot(PSIFilename, bamdir, mopup_events, sample_group_db, groups, True)
    return gene_to_symbol
Beispiel #2
0
def matrixImport(filename):
    matrix={}
    original_data={}
    headerRow=True
    for line in open(filename,'rU').xreadlines():
        original_line = line
        data = line.rstrip()
        values = string.split(data,'\t')
        if headerRow:
            group_db={}
            groups=[]
            if ':' in data:
                group_sample_list = map(lambda x: string.split(x,':'),values[1:])
                index=1
                for (g,s) in group_sample_list:
                    try: group_db[g].append(index)
                    except Exception: group_db[g] = [index]
                    index+=1
                    if g not in groups: groups.append(g)
            else:
                import ExpressionBuilder
                search_dir = string.split(filename,'AltResults')[0]+'ExpressionInput'
                files = unique.read_directory(search_dir)
                for file in files:
                    if 'groups.' in file:
                        sample_group_db = ExpressionBuilder.simplerGroupImport(search_dir+'/'+file)
                index=0
                for s in values[1:]:
                    g = sample_group_db[s]
                    try: group_db[g].append(index)
                    except Exception: group_db[g] = [index]
                    index+=1
                    if g not in groups: groups.append(g)
            headerRow = False
            grouped_values=[]
            original_data['header'] = original_line
        else:
            key = values[0]
            grouped_floats=[]
            float_values = []
            for g in groups: ### string values
                gvalues_list=[]
                for i in group_db[g]:
                    if values[i] != '0':
                        try: gvalues_list.append(float(values[i]))
                        except Exception: pass
                    else:
                        try: gvalues_list.append('') ### Thus are missing values
                        except Exception: pass
                grouped_floats.append(gvalues_list)
            matrix[key] = grouped_floats
            if '\n' not in original_line:
                original_line+='\n'
            original_data[key] = original_line
            last_line = line
    return matrix,original_data
Beispiel #3
0
def remoteSashimiPlot(Species,
                      fl,
                      bamdir,
                      eventsToVisualizeFilename,
                      events=None,
                      show=False):
    global PSIFilename
    global outputdir
    global root_dir
    global steady_state_exp_file
    global species
    species = Species

    try:
        countinp = fl.CountsFile()
        root_dir = fl.RootDir()
    except Exception:
        root_dir = fl
        search_dir = root_dir + '/ExpressionInput'
        files = unique.read_directory(search_dir)
        for file in files:
            if 'counts.' in file and 'steady-state.txt' not in file:
                countinp = search_dir + '/' + file

    PSIFilename = root_dir + '/AltResults/AlternativeOutput/' + species + '_RNASeq_top_alt_junctions-PSI.txt'

    import ExpressionBuilder
    dir_list = unique.read_directory(root_dir + '/ExpressionInput')
    for file in dir_list:
        if 'exp.' in file and 'steady-state' not in file:
            exp_file = root_dir + '/ExpressionInput/' + file
        elif 'exp.' in file and 'steady-state' in file:
            steady_state_exp_file = root_dir + '/ExpressionInput/' + file
    global sample_group_db
    sample_group_db = ExpressionBuilder.simplerGroupImport(exp_file)

    #outputdir=findParentDir(PSIFilename)+"sashimiplots"
    outputdir = root_dir + '/ExonPlots'
    outputdir = root_dir + '/SashimiPlots'
    try:
        os.mkdir(unique.filepath(outputdir))
    except Exception:
        pass

    if show:
        s = open(outputdir + '/show.txt', 'w')
        s.write('TRUE')
        s.close()
    else:
        s = open(outputdir + '/show.txt', 'w')
        s.write('FALSE')
        s.close()

    geneSymbol_db = Sashimiplottting(bamdir,
                                     countinp,
                                     PSIFilename,
                                     eventsToVisualizeFilename,
                                     events=events)
    for filename in os.listdir(outputdir):
        if '.pdf' in filename or '.png' in filename:
            fn = string.replace(filename, '.pdf', '')
            fn = string.replace(fn, '.png', '')
            newname = string.split(fn, '__')
            if newname[0] in geneSymbol_db:
                new_filename = str(filename)
                if '__' in filename:
                    new_filename = string.split(filename, '__')[1]
                elif '\\' in filename:
                    new_filename = string.split(filename, '\\')[1]
                elif '/' in filename:
                    new_filename = string.split(filename, '/')[1]
                nnname = geneSymbol_db[
                    newname[0]][0] + '-SashimiPlot_' + new_filename
                try:
                    os.rename(os.path.join(outputdir, filename),
                              os.path.join(outputdir, nnname))
                except Exception:
                    if 'already exists' in traceback.format_exc():
                        ### File already exists, delete the new one
                        try:
                            os.remove(os.path.join(outputdir, nnname))
                        except Exception:
                            pass
                        ### Now right the new one
                        try:
                            os.rename(os.path.join(outputdir, filename),
                                      os.path.join(outputdir, nnname))
                        except Exception:
                            pass
                    pass
            else:
                continue
    print ''
Beispiel #4
0
def sashmi_plot_list(bamdir,
                     eventsToVisualizeFilename,
                     PSIFilename,
                     events=None):
    try:
        import gene_associations
        gene_to_symbol = gene_associations.getGeneToUid(
            species, ('hide', 'Ensembl-Symbol'))
        from import_scripts import OBO_import
        symbol_to_gene = OBO_import.swapKeyValues(gene_to_symbol)
    except Exception:
        symbol_to_gene = {}

    if events == None:
        splicing_events, expandedSearch = importSplicingEventsToVisualize(
            eventsToVisualizeFilename)
    else:
        ### Replace any ":" from the input events
        #for i in range(len(events)): events[i] = string.replace(events[i],':','__')
        expandedSearch = True

        for i in range(len(events)):
            gene = string.split(events[i], '__')[0]
            if gene in gene_to_symbol:
                symbol = gene_to_symbol[gene][0]
            elif 'ENS' not in gene or 'G0000' in gene:
                if gene in symbol_to_gene:
                    ensID = symbol_to_gene[gene][0]
                    symbol = gene
                    events[
                        i] = ensID  ### translate this ID to an Ensembl gene ID for propper SashimiPlot lookup
        splicing_events = events  ### optionally get from supplied variable

    if len(splicing_events) == 0:
        print eventsToVisualizeFilename
        forceNoCompatibleEventsInFile

    print 'Exporting plots',

    ### Determine Groups for Coloring
    groups_file = 'None'
    dir_list = unique.read_directory(root_dir + '/ExpressionInput')

    for file in dir_list:
        if 'groups.' in file:
            groups_file = root_dir + '/ExpressionInput/' + file

    if groups_file != None:
        try:
            import ExpressionBuilder
            sample_group_db = ExpressionBuilder.simplerGroupImport(groups_file)
            groups = []
            for sample in sample_group_db:
                if sample_group_db[sample] not in groups:
                    groups.append(sample_group_db[sample]
                                  )  ### create an ordered list of unique group
        except Exception:
            groups = ['None']
            #print traceback.format_exc()
            pass

    processed_events = formatAndSubmitSplicingEventsToSashimiPlot(
        PSIFilename, bamdir, splicing_events, sample_group_db, groups, False)
    mopup_events = getMopUpEvents(splicing_events, processed_events)

    ### Do the same for supplied gene queries or junctions that didn't map above using the gene expression values as a guide
    #print len(splicing_events),len(processed_events),len(mopup_events)
    processed_events = formatAndSubmitSplicingEventsToSashimiPlot(
        steady_state_exp_file, bamdir, mopup_events, sample_group_db, groups,
        expandedSearch)
    if len(processed_events) > 0:
        mopup_events = getMopUpEvents(mopup_events, processed_events)
        processed_events = formatAndSubmitSplicingEventsToSashimiPlot(
            PSIFilename, bamdir, mopup_events, sample_group_db, groups, True)
    return gene_to_symbol
Beispiel #5
0
def remoteSashimiPlot(Species, fl, bamdir, eventsToVisualizeFilename, events=None, show=False):
    global PSIFilename
    global outputdir
    global root_dir
    global steady_state_exp_file
    global species
    species = Species

    try:
        countinp = fl.CountsFile()
        root_dir = fl.RootDir()
    except Exception:
        root_dir = fl
        search_dir = root_dir + "/ExpressionInput"
        files = unique.read_directory(search_dir)
        for file in files:
            if "counts." in file and "steady-state.txt" not in file:
                countinp = search_dir + "/" + file

    PSIFilename = root_dir + "/AltResults/AlternativeOutput/" + species + "_RNASeq_top_alt_junctions-PSI.txt"

    import ExpressionBuilder

    dir_list = unique.read_directory(root_dir + "/ExpressionInput")
    for file in dir_list:
        if "exp." in file and "steady-state" not in file:
            exp_file = root_dir + "/ExpressionInput/" + file
        elif "exp." in file and "steady-state" in file:
            steady_state_exp_file = root_dir + "/ExpressionInput/" + file
    global sample_group_db
    sample_group_db = ExpressionBuilder.simplerGroupImport(exp_file)

    # outputdir=findParentDir(PSIFilename)+"sashimiplots"
    outputdir = root_dir + "/ExonPlots"
    outputdir = root_dir + "/SashimiPlots"
    try:
        os.mkdir(unique.filepath(outputdir))
    except Exception:
        pass

    if show:
        s = open(outputdir + "/show.txt", "w")
        s.write("TRUE")
        s.close()
    else:
        s = open(outputdir + "/show.txt", "w")
        s.write("FALSE")
        s.close()

    geneSymbol_db = Sashimiplottting(bamdir, countinp, PSIFilename, eventsToVisualizeFilename, events=events)
    for filename in os.listdir(outputdir):
        if ".pdf" in filename or ".png" in filename:
            fn = string.replace(filename, ".pdf", "")
            fn = string.replace(fn, ".png", "")
            newname = string.split(fn, "__")
            if newname[0] in geneSymbol_db:
                new_filename = str(filename)
                if "__" in filename:
                    new_filename = string.split(filename, "__")[1]
                elif "\\" in filename:
                    new_filename = string.split(filename, "\\")[1]
                elif "/" in filename:
                    new_filename = string.split(filename, "/")[1]
                nnname = geneSymbol_db[newname[0]][0] + "-SashimiPlot_" + new_filename
                try:
                    os.rename(os.path.join(outputdir, filename), os.path.join(outputdir, nnname))
                except Exception:
                    if "already exists" in traceback.format_exc():
                        ### File already exists, delete the new one
                        try:
                            os.remove(os.path.join(outputdir, nnname))
                        except Exception:
                            pass
                        ### Now right the new one
                        try:
                            os.rename(os.path.join(outputdir, filename), os.path.join(outputdir, nnname))
                        except Exception:
                            pass
                    pass
            else:
                continue
    print ""
Beispiel #6
0
def remoteSashimiPlot(Species,fl,bamdir,eventsToVisualizeFilename,events=None,show=False):
    global PSIFilename
    global outputdir
    global root_dir
    global steady_state_exp_file
    global species
    species = Species
    
    try:
        countinp = fl.CountsFile()
        root_dir = fl.RootDir()
    except Exception:
        root_dir = fl
        search_dir = root_dir+'/ExpressionInput'
        files = unique.read_directory(search_dir)
        for file in files:
            if 'counts.' in file and 'steady-state.txt' not in file:
                countinp = search_dir+'/'+file
    
    PSIFilename = root_dir+'/AltResults/AlternativeOutput/'+species+'_RNASeq_top_alt_junctions-PSI.txt'
    
    import ExpressionBuilder
    dir_list = unique.read_directory(root_dir+'/ExpressionInput')
    for file in dir_list:
        if 'exp.' in file and 'steady-state' not in file:
            exp_file = root_dir+'/ExpressionInput/'+file
        elif 'exp.' in file and 'steady-state' in file:
            steady_state_exp_file = root_dir+'/ExpressionInput/'+file
    global sample_group_db
    sample_group_db = ExpressionBuilder.simplerGroupImport(exp_file)
    
    #outputdir=findParentDir(PSIFilename)+"sashimiplots"
    outputdir = root_dir+'/ExonPlots'
    outputdir = root_dir+'/SashimiPlots'
    try: os.mkdir(unique.filepath(outputdir))
    except Exception: pass
    
    if show:
        s = open(outputdir+'/show.txt','w')
        s.write('TRUE'); s.close()
    else:
        s = open(outputdir+'/show.txt','w')
        s.write('FALSE'); s.close()

    geneSymbol_db=Sashimiplottting(bamdir,countinp,PSIFilename,eventsToVisualizeFilename,events=events)
    for filename in os.listdir(outputdir):
        if '.pdf' in filename or '.png' in filename:
            fn = string.replace(filename,'.pdf','')
            fn = string.replace(fn,'.png','')
            newname=string.split(fn,'__')
            if newname[0] in geneSymbol_db:
                new_filename = str(filename)
                if '__' in filename:
                    new_filename = string.split(filename,'__')[1]
                elif '\\' in filename:
                    new_filename = string.split(filename,'\\')[1]
                elif '/' in filename:
                    new_filename = string.split(filename,'/')[1]
                nnname=geneSymbol_db[newname[0]][0]+'-SashimiPlot_'+new_filename
                try: os.rename(os.path.join(outputdir, filename), os.path.join(outputdir,nnname))
                except Exception:
                    if 'already exists' in traceback.format_exc():
                        ### File already exists, delete the new one
                        try: os.remove(os.path.join(outputdir,nnname))
                        except Exception: pass
                        ### Now right the new one
                        try: os.rename(os.path.join(outputdir, filename), os.path.join(outputdir,nnname))
                        except Exception: pass
                    pass
            else:
                continue
    print ''
Beispiel #7
0
def matrixImport(filename):
    matrix={}
    compared_groups={} ### track which values correspond to which groups for pairwise group comparisons
    original_data={}
    headerRow=True
    for line in open(filename,'rU').xreadlines():
        original_line = line
        data = line.rstrip()
        values = string.split(data,'\t')
        #print len(values)
        if headerRow:
            group_db={}
            groups=[]
            if ':' in data:
                group_sample_list = map(lambda x: string.split(x,':'),values[1:])
                index=1
                for (g,s) in group_sample_list:
                    try: group_db[g].append(index)
                    except Exception: group_db[g] = [index]
                    index+=1
                    if g not in groups: groups.append(g)
            else:
                import ExpressionBuilder
                search_dir = string.split(filename,'AltResults')[0]+'ExpressionInput'
                files = unique.read_directory(search_dir)
                for file in files:
                    if 'groups.' in file and '.txt' in file:
                        #print file
                        sample_group_db = ExpressionBuilder.simplerGroupImport(search_dir+'/'+file)
                
                index=0; count=0
                for s in values[1:]:
                    if s in sample_group_db:
                        g = sample_group_db[s]
                        try: group_db[g].append(index)
                        except Exception: group_db[g] = [index]
                        count+=1
                        if g not in groups: groups.append(g)
                    #else: print [s]
                    index+=1
            #print count
            headerRow = False
            grouped_values=[]
            original_data['header'] = original_line
        else:
            key = values[0]
            values=values[1:]
            grouped_floats=[]
            float_values = []
            associated_groups=[]
            for g in groups: ### string values
                gvalues_list=[]
                for i in group_db[g]:
                    try:
                        if values[i] != '0':
                            try:
                                gvalues_list.append(float(values[i]))
                            except Exception: pass
                        else:
                            #try: gvalues_list.append('') ### Thus are missing values
                            #except Exception: pass
                            pass
                    except Exception:
                        #try: gvalues_list.append('') ### Thus are missing values
                        #except Exception: pass
                        pass
                grouped_floats.append(gvalues_list)
                if len(gvalues_list)>1:
                    associated_groups.append(g)
            matrix[key] = grouped_floats
            compared_groups[key] = associated_groups
            if '\n' not in original_line:
                original_line+='\n'
            original_data[key] = original_line
            last_line = line
    return matrix,compared_groups,original_data
Beispiel #8
0
def matrixImport(filename):
    matrix={}
    compared_groups={} ### track which values correspond to which groups for pairwise group comparisons
    original_data={}
    headerRow=True
    for line in open(filename,'rU').xreadlines():
        original_line = line
        data = line.rstrip()
        values = string.split(data,'\t')
        #print len(values)
        if headerRow:
            group_db={}
            groups=[]
            if ':' in data:
                group_sample_list = map(lambda x: string.split(x,':'),values[1:])
                index=1
                for (g,s) in group_sample_list:
                    try: group_db[g].append(index)
                    except Exception: group_db[g] = [index]
                    index+=1
                    if g not in groups: groups.append(g)
            else:
                import ExpressionBuilder
                search_dir = string.split(filename,'AltResults')[0]+'ExpressionInput'
                files = unique.read_directory(search_dir)
                for file in files:
                    if 'groups.' in file and '.txt' in file:
                        #print file
                        sample_group_db = ExpressionBuilder.simplerGroupImport(search_dir+'/'+file)
                
                index=0; count=0
                for s in values[1:]:
                    if s in sample_group_db:
                        g = sample_group_db[s]
                        try: group_db[g].append(index)
                        except Exception: group_db[g] = [index]
                        count+=1
                        if g not in groups: groups.append(g)
                    #else: print [s]
                    index+=1
            #print count
            headerRow = False
            grouped_values=[]
            original_data['header'] = original_line
        else:
            key = values[0]
            values=values[1:]
            grouped_floats=[]
            float_values = []
            associated_groups=[]
            for g in groups: ### string values
                gvalues_list=[]
                for i in group_db[g]:
                    try:
                        if values[i] != '0':
                            try:
                                gvalues_list.append(float(values[i]))
                            except Exception: pass
                        else:
                            #try: gvalues_list.append('') ### Thus are missing values
                            #except Exception: pass
                            pass
                    except Exception:
                        #try: gvalues_list.append('') ### Thus are missing values
                        #except Exception: pass
                        pass
                grouped_floats.append(gvalues_list)
                if len(gvalues_list)>1:
                    associated_groups.append(g)
            matrix[key] = grouped_floats
            compared_groups[key] = associated_groups
            if '\n' not in original_line:
                original_line+='\n'
            original_data[key] = original_line
            last_line = line
    return matrix,compared_groups,original_data