Ejemplo n.º 1
0
def tiers2xls(tier_dirs, output_dir):
    # make new excel output worksheet
    wb = xlwt.Workbook()
     
    for dir in tier_dirs:
        dirname = general_utils.get_file_or_dir_name(dir)
        sheetname = dirname
        sheet = wb.add_sheet(sheetname)
        rowNum = 0 # number of row to write to within excel sheet
        tier0file = os.path.join(dir, 'tiering_allvars.tier0.txt')
        tier1file = os.path.join(dir, 'tiering_allvars.tier1.txt')
        tier2file = os.path.join(dir, 'tiering_allvars.tier2.txt')
        tier3file = os.path.join(dir, 'tiering_allvars.tier3.txt')
        tier4file = os.path.join(dir, 'tiering_allvars.tier4.txt')
        tierFiles = [tier0file, tier1file, tier2file, tier3file, tier4file]
        
        for idx,tierFile in enumerate(tierFiles):
            tierh = open(tierFile, 'r')
            if(idx != 0): # skip header for all files except the first one
                tierh.readline()
                
            for line in tierh:
                lineContents = line.rstrip("\n").split("\t")
                for col,value in enumerate(lineContents):
                    sheet.write(rowNum, col, value)
                
                rowNum += 1
    
    #save the excel file
    out_path = os.path.join(output_dir, 'tiered_output.xls')
    wb.save(out_path)
    return out_path
Ejemplo n.º 2
0
def tiers2xls(tier_dirs, output_dir, yaml_commands):
    print 'Saving tiered variants to Excel worksheet'
    # make new excel output worksheet
    wb = xlwt.Workbook(
        encoding='latin-1'
    )  # needed since our SQLite database gives results using latin-1 encoding (so we'd otherwise get UnicodeDecode errors when attempting to decode certain characters if we specified nothing (default is probably ascii) or (probably) UTF-8 as the encoding for the workbook).

    for dir in tier_dirs:
        dirname = general_utils.get_file_or_dir_name(dir)
        sheetname = dirname
        sheet = wb.add_sheet(sheetname)
        rowNum = 0  # number of row to write to within excel sheet
        tier0file = os.path.join(dir, 'tiering_allvars.tier0.txt')
        tier1file = os.path.join(dir, 'tiering_allvars.tier1.txt')
        tier2file = os.path.join(dir, 'tiering_allvars.tier2.txt')
        tier3file = os.path.join(dir, 'tiering_allvars.tier3.txt')
        tier4file = os.path.join(dir, 'tiering_allvars.tier4.txt')
        tierFiles = [tier0file, tier1file, tier2file, tier3file, tier4file]

        for idx, tierFile in enumerate(tierFiles):
            tierh = open(tierFile, 'r')
            if (idx != 0):  # skip header for all files except the first one
                tierh.readline()

            warned_of_tier_variants_exceeded = False

            for lineNum, line in enumerate(tierh):
                lineContents = line.rstrip("\n").split("\t")
                for col, value in enumerate(lineContents):
                    if (len(value) > 32767):
                        print 'warning: tierFile ' + str(
                            tierFile) + ' line ' + str(
                                lineNum) + ' col ' + str(
                                    col) + ' has length ' + str(
                                        len(value)
                                    ) + ' longer than 32767 chars. Truncating.'
                        sheet.write(rowNum, col, value[:32767])
                    elif (lineNum >
                          yaml_commands[yaml_keys.kModules][yaml_keys.kTiering]
                          [yaml_keys.kTMaxNumVariantsPerTier]):
                        if (not warned_of_tier_variants_exceeded):
                            print 'warning: number of variants exceeded for ' + str(
                                tierFile
                            ) + '. Omitting remaining variants in this tier from XLS file. Current max variants per tier: ' + str(
                                yaml_commands[yaml_keys.kModules][
                                    yaml_keys.kTiering][
                                        yaml_keys.kTMaxNumVariantsPerTier]
                            ) + '. To output more variants per tier, modify this value in modules.yml.'
                            warned_of_tier_variants_exceeded = True
                        continue  # don't increment the rowNum since we didn't write anything
                    else:
                        sheet.write(rowNum, col, value)

                rowNum += 1

    #save the excel file
    out_path = os.path.join(output_dir, 'tiered_output.xls')
    wb.save(out_path)
    return out_path
Ejemplo n.º 3
0
def tiers2xls(tier_dirs, output_dir, yaml_commands):
    print 'Saving tiered variants to Excel worksheet'
    # make new excel output worksheet
    wb = xlwt.Workbook(encoding='latin-1') # needed since our SQLite database gives results using latin-1 encoding (so we'd otherwise get UnicodeDecode errors when attempting to decode certain characters if we specified nothing (default is probably ascii) or (probably) UTF-8 as the encoding for the workbook).
    
    for dir in tier_dirs:
        dirname = general_utils.get_file_or_dir_name(dir)
        sheetname = dirname
        sheet = wb.add_sheet(sheetname)
        rowNum = 0 # number of row to write to within excel sheet
        tier0file = os.path.join(dir, 'tiering_allvars.tier0.txt')
        tier1file = os.path.join(dir, 'tiering_allvars.tier1.txt')
        tier2file = os.path.join(dir, 'tiering_allvars.tier2.txt')
        tier3file = os.path.join(dir, 'tiering_allvars.tier3.txt')
        tier4file = os.path.join(dir, 'tiering_allvars.tier4.txt')
        tierFiles = [tier0file, tier1file, tier2file, tier3file, tier4file]
        
        for idx,tierFile in enumerate(tierFiles):
            tierh = open(tierFile, 'r')
            if(idx != 0): # skip header for all files except the first one
                tierh.readline()
                
            warned_of_tier_variants_exceeded = False
            
            for lineNum,line in enumerate(tierh):
                lineContents = line.rstrip("\n").split("\t")
                for col,value in enumerate(lineContents):
                    if(len(value) > 32767):
                        print 'warning: tierFile ' + str(tierFile) + ' line ' + str(lineNum) + ' col ' + str(col) + ' has length ' + str(len(value)) + ' longer than 32767 chars. Truncating.'
                        sheet.write(rowNum, col, value[:32767])
                    elif(lineNum > yaml_commands[yaml_keys.kModules][yaml_keys.kTiering][yaml_keys.kTMaxNumVariantsPerTier]):
                        if(not warned_of_tier_variants_exceeded):
                            print 'warning: number of variants exceeded for ' + str(tierFile) + '. Omitting remaining variants in this tier from XLS file. Current max variants per tier: ' + str(yaml_commands[yaml_keys.kModules][yaml_keys.kTiering][yaml_keys.kTMaxNumVariantsPerTier]) + '. To output more variants per tier, modify this value in modules.yml.'
                            warned_of_tier_variants_exceeded = True
                        continue # don't increment the rowNum since we didn't write anything
                    else:
                        sheet.write(rowNum, col, value)
                
                rowNum += 1
    
    #save the excel file
    out_path = os.path.join(output_dir, 'tiered_output.xls')
    wb.save(out_path)
    return out_path