Beispiel #1
0
def extract_zhang(file_name, type):
    names_to_types = {}

    loader = TableLoader()

    print file_name
    print "*" * 100

    entries = loader.load("zhang/final/%s" % file_name)

    for entry in entries:
        # print entry
        # i+=1
        #
        # if i == 10:
        #	 break
        name = entry["name"]

        if "/" in name:
            print name, "might not be a srna type"

        if not names_to_types.has_key(name):
            names_to_types[name] = type
        else:
            print "[warning] multiple entries of the same name"

    for key, value in names_to_types.items():
        print "name: %s\t|\t" % key, "type: %s" % value
Beispiel #2
0
def extract_zhang(file_name, type):
	names_to_types = {}

	loader = TableLoader()

	print file_name
	print "*" * 100

	entries = loader.load("zhang/final/%s" % file_name)

	for entry in entries:
		# print entry
		# i+=1
		#
		# if i == 10:
		#	 break
		name = entry["name"]

		if "/" in name:
			print name, "might not be a srna type"

		if not names_to_types.has_key(name):
			names_to_types[name] = type
		else:
			print "[warning] multiple entries of the same name"


	for key, value in names_to_types.items():
		print "name: %s\t|\t" % key, "type: %s" % value
Beispiel #3
0
def extract_bilusic(file_name, type):
	names_to_types = {}

	loader = TableLoader()

	print file_name
	print "*" * 100

	entries = loader.load("bilusic/final/%s" % file_name)

	for entry in entries:
		# print entry
		# i+=1
		#
		# if i == 10:
		#	 break
		name = entry["name"]

		if not names_to_types.has_key(name):
			names_to_types[name] = type

			if "5'utr" in name:
				names_to_types[name] = "5'utr"
			elif "3'utr" in name:
				names_to_types[name] = "3'utr"

		else:
			print "[warning] multiple entries of the same name"



	for key, value in names_to_types.items():
		print "name: %s\t|\t" % key, "type: %s" % value
Beispiel #4
0
def extract_bilusic(file_name, type):
    names_to_types = {}

    loader = TableLoader()

    print file_name
    print "*" * 100

    entries = loader.load("bilusic/final/%s" % file_name)

    for entry in entries:
        # print entry
        # i+=1
        #
        # if i == 10:
        #	 break
        name = entry["name"]

        if not names_to_types.has_key(name):
            names_to_types[name] = type

            if "5'utr" in name:
                names_to_types[name] = "5'utr"
            elif "3'utr" in name:
                names_to_types[name] = "3'utr"

        else:
            print "[warning] multiple entries of the same name"

    for key, value in names_to_types.items():
        print "name: %s\t|\t" % key, "type: %s" % value
Beispiel #5
0
def extract_our_gens():
	files = ["assign-type-to-all-chimeras-of-Iron_limitation_CL_FLAG207_208_305_all_fragments_l25.txt_all_interactions.with-type",
			 "assign-type-to-all-chimeras-of-Log_phase_CL_FLAG101-104_108_109_all_fragments_l25.txt_all_interactions.with-type",
			 "assign-type-to-all-chimeras-of-MG_hfq-WT101_cutadapt_bwa.bam_all_fragments_l25.txt_all_interactions.with-type",
			 "assign-type-to-all-chimeras-of-MG_hfq-wt202_CL_Stationary_cutadapt_bwa.bam_all_fragments_l25.txt_all_interactions.with-type",
			 "assign-type-to-all-chimeras-of-Stationary_CL_FLAG209_210_312_all_fragments_l25.txt_all_interactions.with-type",
			 "assign-type-to-signif-chimeras-of-Iron_limitation_CL_FLAG207_208_305_all_fragments_l25.txt_sig_interactions.with-type",
			 "assign-type-to-signif-chimeras-of-Log_phase_CL_FLAG101-104_108_109_all_fragments_l25.txt_sig_interactions.with-type",
			 "assign-type-to-signif-chimeras-of-Stationary_CL_FLAG209_210_312_all_fragments_l25.txt_sig_interactions.with-type",
			 "assign-type-to-single-counts-of-Iron_limitation_CL_FLAG207_208_305_all_fragments_l25.txt_single_counts.with-type",
			 "assign-type-to-single-counts-of-Log_phase_CL_FLAG101-104_108_109_all_fragments_l25.txt_single_counts.with-type",
			 "assign-type-to-single-counts-of-MG_hfq-WT101_cutadapt_bwa.bam_all_fragments_l25.txt_single_counts.with-type",
			 "assign-type-to-single-counts-of-MG_hfq-wt202_CL_Stationary_cutadapt_bwa.bam_all_fragments_l25.txt_single_counts.with-type",
			 "assign-type-to-single-counts-of-Stationary_CL_FLAG209_210_312_all_fragments_l25.txt_single_counts.with-type"]


	names_to_types = {}

	loader = TableLoader()

	for file_name in files:

		i = 0

		print file_name
		print "*" * 100

		entries = loader.load("our_files/%s" % file_name)

		for entry in entries:
			# print entry
			# i+=1
			#
			# if i == 10:
			#	 break
			name = entry["rna1 name"]

			if not names_to_types.has_key(name):
				names_to_types[name] = entry["first_type"]

			name = entry["rna2 name"]

			if not names_to_types.has_key(name):
				names_to_types[name] = entry["second_type"]


	for key, value in names_to_types.items():
		print "name: %s\t|\t" % key, "type: %s" % value
Beispiel #6
0
def extract_our_gens():
    files = [
        "assign-type-to-all-chimeras-of-Iron_limitation_CL_FLAG207_208_305_all_fragments_l25.txt_all_interactions.with-type",
        "assign-type-to-all-chimeras-of-Log_phase_CL_FLAG101-104_108_109_all_fragments_l25.txt_all_interactions.with-type",
        "assign-type-to-all-chimeras-of-MG_hfq-WT101_cutadapt_bwa.bam_all_fragments_l25.txt_all_interactions.with-type",
        "assign-type-to-all-chimeras-of-MG_hfq-wt202_CL_Stationary_cutadapt_bwa.bam_all_fragments_l25.txt_all_interactions.with-type",
        "assign-type-to-all-chimeras-of-Stationary_CL_FLAG209_210_312_all_fragments_l25.txt_all_interactions.with-type",
        "assign-type-to-signif-chimeras-of-Iron_limitation_CL_FLAG207_208_305_all_fragments_l25.txt_sig_interactions.with-type",
        "assign-type-to-signif-chimeras-of-Log_phase_CL_FLAG101-104_108_109_all_fragments_l25.txt_sig_interactions.with-type",
        "assign-type-to-signif-chimeras-of-Stationary_CL_FLAG209_210_312_all_fragments_l25.txt_sig_interactions.with-type",
        "assign-type-to-single-counts-of-Iron_limitation_CL_FLAG207_208_305_all_fragments_l25.txt_single_counts.with-type",
        "assign-type-to-single-counts-of-Log_phase_CL_FLAG101-104_108_109_all_fragments_l25.txt_single_counts.with-type",
        "assign-type-to-single-counts-of-MG_hfq-WT101_cutadapt_bwa.bam_all_fragments_l25.txt_single_counts.with-type",
        "assign-type-to-single-counts-of-MG_hfq-wt202_CL_Stationary_cutadapt_bwa.bam_all_fragments_l25.txt_single_counts.with-type",
        "assign-type-to-single-counts-of-Stationary_CL_FLAG209_210_312_all_fragments_l25.txt_single_counts.with-type"
    ]

    names_to_types = {}

    loader = TableLoader()

    for file_name in files:

        i = 0

        print file_name
        print "*" * 100

        entries = loader.load("our_files/%s" % file_name)

        for entry in entries:
            # print entry
            # i+=1
            #
            # if i == 10:
            #	 break
            name = entry["rna1 name"]

            if not names_to_types.has_key(name):
                names_to_types[name] = entry["first_type"]

            name = entry["rna2 name"]

            if not names_to_types.has_key(name):
                names_to_types[name] = entry["second_type"]

    for key, value in names_to_types.items():
        print "name: %s\t|\t" % key, "type: %s" % value
Beispiel #7
0
    def __init__(self, our_tables_list, article_tables_list):

        loader = OurTableLoader()

        self._our_tables_list = \
            [loader.createTable(name, loader.loadUnprocessed(path)) for name, path in our_tables_list]

        loader = TableLoader()
        self._article_tables_list = [loader.createTable(name, loader.load(path)) for name, path in article_tables_list]
Beispiel #8
0
def generate_table_old(table_path, name, is_our_table=False):

    if not is_our_table:
        loader = TableLoader()
        table = loader.createTable(name, loader.load(table_path))

    else:
        loader = OurTableLoader()
        table = loader.createTable(name, loader.loadUnprocessed(table_path))

    db = MySQLdb.connect(host="localhost",
                         user="******",
                         db="article_refactor_24_3_2016")
    cur = db.cursor(MySQLdb.cursors.DictCursor)

    # Generate the keys for the table
    id_keys = [
        TableGlobals.FIRST_START_BASE_KEY, TableGlobals.FIRST_END_BASE_KEY,
        TableGlobals.FIRST_STRAND_KEY, TableGlobals.SECOND_START_BASE_KEY,
        TableGlobals.SECOND_END_BASE_KEY, TableGlobals.SECOND_STRAND_KEY
    ]

    for key in table._dctData.values()[0].keys():
        if key not in id_keys:
            id_keys.append(key)

    fields = ", ".join("%s VARCHAR(200)" % key.replace(" ", "_").replace(
        "-", "_").replace("'", "").replace("/", "") for key in id_keys
                       if key != "")
    print fields
    cur.execute("CREATE TABLE %s (%s)" % (table.get_name(), fields))

    table_as_list = []

    # Generate dictionary for each row according to the keys
    for key, value in table:
        id_values = key.split(Table.ID_DELIMITER)

        for id_key, id_val in zip(id_keys, id_values):

            value[id_key] = str(id_val)

        table_as_list.append(value)
        # print "-->", value

    # Go over the rows and add them to the db
    for row in table_as_list:

        values = ",".join("%s" % db.literal(str(row[key])) for key in id_keys
                          if key != "")
        # print "INSERT INTO %s VALUES (%s)" % (table.get_name(), values)
        cur.execute("INSERT INTO %s VALUES (%s)" % (table.get_name(), values))

    db.commit()
Beispiel #9
0
def generate_table_old(table_path, name, is_our_table=False):

	if not is_our_table:
		loader = TableLoader()
		table = loader.createTable(name, loader.load(table_path))

	else:
		loader = OurTableLoader()
		table = loader.createTable(name, loader.loadUnprocessed(table_path))

	db=MySQLdb.connect(host="localhost",user="******",db="article_refactor_24_3_2016")
	cur = db.cursor(MySQLdb.cursors.DictCursor)

	# Generate the keys for the table
	id_keys = [TableGlobals.FIRST_START_BASE_KEY,
				   TableGlobals.FIRST_END_BASE_KEY,
				   TableGlobals.FIRST_STRAND_KEY,
				   TableGlobals.SECOND_START_BASE_KEY,
				   TableGlobals.SECOND_END_BASE_KEY,
				   TableGlobals.SECOND_STRAND_KEY]

	for key in table._dctData.values()[0].keys():
		if key not in id_keys:
			id_keys.append(key)

	fields = ", ".join("%s VARCHAR(200)" % key.replace(" ", "_").replace("-", "_").replace("'", "").replace("/", "") for key in id_keys if key != "")
	print fields
	cur.execute("CREATE TABLE %s (%s)" % (table.get_name(), fields))


	table_as_list = []

	# Generate dictionary for each row according to the keys
	for key, value in table:
		id_values = key.split(Table.ID_DELIMITER)

		for id_key, id_val in zip(id_keys, id_values):

			value[id_key] = str(id_val)

		table_as_list.append(value)
		# print "-->", value

	# Go over the rows and add them to the db
	for row in table_as_list:

		values = ",".join("%s" % db.literal(str(row[key])) for key in id_keys if key != "")
		# print "INSERT INTO %s VALUES (%s)" % (table.get_name(), values)
		cur.execute("INSERT INTO %s VALUES (%s)" % (table.get_name(), values))

	db.commit()
Beispiel #10
0
def lybecker_update(file_name,
                    show_warnings=True,
                    overlap_delimiter="/",
                    overlap_field="annotation of overlapping genes",
                    adjacent_field="adjacent genes",
                    loader_type=LybeckerS2TableLoader):

    geneLoader = GeneTableLoader()
    gene_table = geneLoader.createTable("genes", geneLoader.loadUnprocessed("./genes.col"))

    loader = loader_type()
    table = loader.createTable("lybecker", loader.load("lybecker/final/%s" % file_name))

    new_table_raw = []

    for id, info in table:

        dct = {}

        info.pop(Table.UNIQUE_ID_FIELD)
        start, end, strand = id.split(";")[:3]
        # print info["name"]
        start = int(start)
        end = int(end)

        result = gene_table.is_overlaps(start, end, "none")

        if show_warnings:
            print id
            overlaps = [gene for gene in info[overlap_field].split(overlap_delimiter) if gene != ""]
            print overlaps

            for index, gene in enumerate(result):
                print "%d: %s" % (index, gene[1][1]["name"])
                if gene[1][1]["name"] not in overlaps:
                    print "[warning] older name is being used"

            if len(overlaps) > len(result):
                print "[warning] missing overlapping gene"

            if len(overlaps) < len(result):
                print "[warning] extra overlapping gene"

        # Set the record location
        dct[TableGlobals.FIRST_START_BASE_KEY], dct[TableGlobals.FIRST_END_BASE_KEY], \
            dct[TableGlobals.FIRST_STRAND_KEY], dct[TableGlobals.SECOND_START_BASE_KEY], \
            dct[TableGlobals.SECOND_END_BASE_KEY], dct[TableGlobals.SECOND_STRAND_KEY] = id.split(Table.ID_DELIMITER)

        # Assume unknown strand
        dct[TableGlobals.FIRST_STRAND_KEY] = "none"
        dct[TableGlobals.SECOND_STRAND_KEY] = "none"

        is_valid = result[0][0]
        overlap_names = info[overlap_field].split(overlap_delimiter)

        # Check if major in overlapping names
        if not is_valid:
            is_valid = False

            # print "major names", [gene[1][1]["name"] for gene in result]

            for first in [gene[1][1]["name"] for gene in result]:
                for second in overlap_names:
                    if first in second:
                        is_valid = True
                        break
                if is_valid:
                    break

        # check if minor in overlapping names
        if not is_valid:
            is_valid = False

            other_names = []

            for gene in result:
                other_names.extend(gene[1][1]["other_names"])

            # print "old names", other_names

            for first in other_names:
                for second in overlap_names:
                    if first in second:
                        is_valid = True
                        break
                if is_valid:
                    break

        if "intergenic" == info["category"]:
            is_valid = False
            adjacent_genes = [val for val in info[adjacent_field].split(overlap_delimiter) if val != ""]

            first = gene_table.findByName(adjacent_genes[0])
            second = gene_table.findByName(adjacent_genes[1])

            if first == (None, None):
                first = gene_table.findByOtherNames(adjacent_genes[0])
            if second == (None, None):
                second = gene_table.findByOtherNames(adjacent_genes[1])

            if first != (None, None):
                representing = first
            elif second != (None, None):
                representing = second
            else:
                raise BaseException("No presenting gene found")

            strand = representing[0].split(Table.ID_DELIMITER)[2]

            if strand == TableGlobals.STRAND_POSITIVE:
                dct[TableGlobals.FIRST_STRAND_KEY] = TableGlobals.STRAND_NEGATIVE
                dct[TableGlobals.SECOND_STRAND_KEY] = TableGlobals.STRAND_NEGATIVE

            else:
                dct[TableGlobals.FIRST_STRAND_KEY] = TableGlobals.STRAND_POSITIVE
                dct[TableGlobals.SECOND_STRAND_KEY] = TableGlobals.STRAND_POSITIVE

        if is_valid:
            is_valid = "divergent" not in info["category"] and \
                       "convergent" not in info["category"]


        # Update the record name if gene was found
        if is_valid:
            info["name"] = "overlapping_"
        else:
            name_id = id.split(Table.ID_DELIMITER)[:2]
            name_id.append(dct[TableGlobals.FIRST_STRAND_KEY])
            info["name"] = "lybecker_%s_%s" % (info["category"], Table.ID_DELIMITER.join(name_id))

        pos_count = 0
        neg_count = 0

        if is_valid:

            # for each gene match
            for entry in result:

                # exact match add name
                info["name"] += "%s." % entry[1][1]["name"]
                strand = entry[1][0].split(Table.ID_DELIMITER)[2]

                if TableGlobals.STRAND_NEGATIVE == strand:
                    neg_count += 1

                if TableGlobals.STRAND_POSITIVE == strand:
                    pos_count += 1

            if neg_count == 0:
                dct[TableGlobals.FIRST_STRAND_KEY] = TableGlobals.STRAND_NEGATIVE
                dct[TableGlobals.SECOND_STRAND_KEY] = TableGlobals.STRAND_NEGATIVE

            elif pos_count == 0:
                dct[TableGlobals.FIRST_STRAND_KEY] = TableGlobals.STRAND_POSITIVE
                dct[TableGlobals.SECOND_STRAND_KEY] = TableGlobals.STRAND_POSITIVE
            else:
                print "no strand match: %s" % entry[1][1]["name"]

        # remove extra . from name if match found
        if is_valid:
            info["name"] = info["name"][:-1]

        dct.update(info)

        new_table_raw.append(dct)

        if show_warnings:
            print 20 * "*"

    # for row in new_table_raw:
    #     print row

    TableLoader().createTable("updated_lybecker", new_table_raw).dump("lybecker/final/updated_%s" % file_name)
Beispiel #11
0
def generate_zhang_stats():
	loader = TableLoader()

	rows_as_dictionary = loader.load("output/table_s6_range_0.csv")

	row_list = []

	header = ["name",
			  "il_rna2_percent",
			  "stat_rna2_percent",
			  "log_rna2_percent",
			  "k31_ip", #  distal
			  "r16a_ip", #  rim
			  "q8a_ip", #  proximal
			  "k31",
			  "r16a",
			  "q8a",
			  "average_percent"]

	for row in rows_as_dictionary:

		new_row = [row["name"],
				   row["signif_chimeras_of_iron_limitation_cl.as_rna2_percentage"].replace("-", ""),
				   row["signif_chimeras_of_stationary_cl.as_rna2_percentage"].replace("-", ""),
				   row["signif_chimeras_of_log_phase_cl.as_rna2_percentage"].replace("-", "")]

		matches = get_zhang_stats_by_name(row["name"])

		if len(matches) > 1:
			print "warning too many results"

		elif len(matches) == 1:
			new_row.extend(val for val in matches[0])
			# print new_row

		else:
			new_row.extend([""] * 6)

		average_percent = 0.0
		fields = 0

		if row["signif_chimeras_of_iron_limitation_cl.as_rna2_percentage"] != "-":
			average_percent += float(row["signif_chimeras_of_iron_limitation_cl.as_rna2_percentage"])
			fields += 1

		if row["signif_chimeras_of_stationary_cl.as_rna2_percentage"] != "-":
			average_percent += float(row["signif_chimeras_of_stationary_cl.as_rna2_percentage"])
			fields += 1

		if row["signif_chimeras_of_log_phase_cl.as_rna2_percentage"] != "-":
			average_percent += float(row["signif_chimeras_of_log_phase_cl.as_rna2_percentage"])
			fields += 1

		average_percent /= fields

		new_row.append(average_percent)

		row_list.append(new_row)

	# for row in row_list:
	#	 print row

	fl = open("zhang_stats.csv", "wb")

	fl.write("%s\n" % "\t".join(header))

	for row in row_list:
		fl.write("%s\n" % "\t".join(str(val) for val in row))

	fl.close()
Beispiel #12
0
def generate_zhang_stats():
    loader = TableLoader()

    rows_as_dictionary = loader.load("output/table_s6_range_0.csv")

    row_list = []

    header = [
        "name",
        "il_rna2_percent",
        "stat_rna2_percent",
        "log_rna2_percent",
        "k31_ip",  #  distal
        "r16a_ip",  #  rim
        "q8a_ip",  #  proximal
        "k31",
        "r16a",
        "q8a",
        "average_percent"
    ]

    for row in rows_as_dictionary:

        new_row = [
            row["name"],
            row["signif_chimeras_of_iron_limitation_cl.as_rna2_percentage"].
            replace("-", ""),
            row["signif_chimeras_of_stationary_cl.as_rna2_percentage"].replace(
                "-", ""),
            row["signif_chimeras_of_log_phase_cl.as_rna2_percentage"].replace(
                "-", "")
        ]

        matches = get_zhang_stats_by_name(row["name"])

        if len(matches) > 1:
            print "warning too many results"

        elif len(matches) == 1:
            new_row.extend(val for val in matches[0])
            # print new_row

        else:
            new_row.extend([""] * 6)

        average_percent = 0.0
        fields = 0

        if row["signif_chimeras_of_iron_limitation_cl.as_rna2_percentage"] != "-":
            average_percent += float(
                row["signif_chimeras_of_iron_limitation_cl.as_rna2_percentage"]
            )
            fields += 1

        if row["signif_chimeras_of_stationary_cl.as_rna2_percentage"] != "-":
            average_percent += float(
                row["signif_chimeras_of_stationary_cl.as_rna2_percentage"])
            fields += 1

        if row["signif_chimeras_of_log_phase_cl.as_rna2_percentage"] != "-":
            average_percent += float(
                row["signif_chimeras_of_log_phase_cl.as_rna2_percentage"])
            fields += 1

        average_percent /= fields

        new_row.append(average_percent)

        row_list.append(new_row)

    # for row in row_list:
    #	 print row

    fl = open("zhang_stats.csv", "wb")

    fl.write("%s\n" % "\t".join(header))

    for row in row_list:
        fl.write("%s\n" % "\t".join(str(val) for val in row))

    fl.close()
Beispiel #13
0
def format_final_table(path, our_tables, output_file):

    sets = {
        "Raghavan et al 2011":
        ["raghavan_s5", "raghavan_s6", "raghavan_s7", "raghavan_2"],
        "Lybecker et al 2014": ["lybecker_s1", "lybecker_s2"],
        "Bilusic et al 2014": [
            "bilusic_s1", "bilusic_s2", "bilusic_s3_1", "bilusic_s3_2",
            "bilusic_s4_1", "bilusic_s4_2"
        ]
        # "zhang": ["zhang_s3_2013_sheet2008", "zhang_s3_2013_sheet2009", "zhang_s4_2013_sheet2008", "zhang_s4_2013_sheet2009"],
    }

    mcdowell_set = {"McDowall et al 2014": ["mcdowell"]}
    thomason_set = {
        "Thomason et al 2015": [
            "thomason", "thomason_primary", "thomason_secondary",
            "thomason_internal", "thomason_antisense",
            "thomason_putative_asrna"
        ]
    }

    conditions = [
        "signif_chimeras_of_iron_limitation_cl",
        "signif_chimeras_of_log_phase_cl", "signif_chimeras_of_stationary_cl"
    ]

    conditions_beauty_names = ["Iron limitation", "Log", "Stationary"]

    short_name = {
        "raghavan_s5": "R1",
        "raghavan_s6": "R2",
        "raghavan_s7": "R3",
        "raghavan_2": "R4",
        # "raghavan_s8": "R4",
        "lybecker_s1": "L1",
        "lybecker_s2": "L2",
        "bilusic_s1": "B1",
        "bilusic_s2": "B2",
        "bilusic_s3_1": "B3_1",
        "bilusic_s3_2": "B3_2",
        "bilusic_s4_1": "B4_1",
        "bilusic_s4_2": "B4_2",
        # "zhang_s3_2013_sheet2008": "Z1",
        # "zhang_s3_2013_sheet2009": "Z2",
        # "zhang_s4_2013_sheet2008": "Z3",
        # "zhang_s4_2013_sheet2009": "Z4",
        "thomason": "T1",
        "thomason_primary": "T1_1",
        "thomason_secondary": "T1_2",
        "thomason_internal": "T1_3",
        "thomason_antisense": "T1_4",
        "thomason_putative_asrna": "T1_5",
        "mcdowell": "M1"
    }

    beauty_type_names = {
        "3utr": "3UTR",
        "5utr": "5UTR",
        "as": "AS",
        "cis_as_with_trans_t": "cASt",
        "igr": "IGR",
        "mrna": "CDS",
        "other-ncrna": "oRNA",
        "srna": "sRNA",
        "trna": "tRNA",
        "tu": "IGT"
    }

    loader = TableLoader()
    results = loader.load(path)

    header = [
        "Name", "EcoCyc id", "Type", "Total UI", "sRNA UI", "CDS & 5'UTR UI",
        "3'UTR & IGR UI"
    ]

    start_of_total_interactions = len(header)
    for cond_name in conditions_beauty_names:
        header.append("TNR %s" % cond_name)

    end_of_total_interactions = len(header)

    start_of_interactions = end_of_total_interactions

    for cond_name in conditions_beauty_names:
        header.append("Fraction as RNA2 %s" % cond_name)

    end_of_interactions = len(header)

    header.extend([
        "Longest U tract", "MEME E-value", "MAST P-value", "Meme motif",
        "Total number of targets", "Number of targets with motif",
        "Overlaps known binding site"
    ])

    start_of_regular_tables = len(header)

    for set_name in sets:
        header.append(set_name)

    end_of_regular_tables = len(header)

    for set_name in thomason_set:
        header.append(set_name)

    end_of_thomason_tables = len(header)

    for set_name in mcdowell_set:
        header.append(set_name)

    end_of_tables = len(header)

    header.append("# of supporting papers")

    final_rows = []

    # Go over the rows and fill according to the header
    for index, row in enumerate(results):
        row_values = [
            row["name"], row["ecocyc_id"], row["type"], row["total_targets"],
            row["tb_srna_targets"], row["mrna_5utr_targets"],
            row["igr_3utr_targets"]
        ]

        # Total interactions
        for field in header[
                start_of_total_interactions:end_of_total_interactions]:
            cond_name = get_condition_by_header_name(field, conditions,
                                                     conditions_beauty_names)

            first_count = int(row["%s_first_interactions" % cond_name])
            second_count = int(row["%s_second_interactions" % cond_name])

            row_values.append(first_count + second_count)

        # interactions percentage
        for field in header[start_of_interactions:end_of_interactions]:
            cond_name = get_condition_by_header_name(field, conditions,
                                                     conditions_beauty_names)

            first_count = float(row["%s_first_interactions" % cond_name])
            second_count = float(row["%s_second_interactions" % cond_name])
            total_count = first_count + second_count

            if float(total_count) == 0:
                res = "-"
            else:
                res = second_count / total_count
                res = "%.2f" % res

            row_values.append(res)

        row_values.extend([
            row["max_poly_u_length"], row["meme"].upper(), row["mast"].upper(),
            row["motif"], row["total_number_of_targets"],
            row["number_of_targets_with_motif"], row["binding_site_state"]
        ])

        total_articles = 0

        # Go over the table hit fields and merge columns
        for set_name in header[start_of_regular_tables:end_of_regular_tables]:

            field_values = []

            for cond_name in conditions:
                for table in sets[set_name]:
                    if row["%s_%s" % (table, cond_name)] == "+":
                        field_values.append(short_name[table])
                    elif row["%s_%s" % (table, cond_name)] == "-":
                        continue
                    else:
                        print "[warning] invalid value for cell"

            if len(field_values) > 0:
                total_articles += 1

            row_values.append(";".join(list(set(field_values))))

        # Go over the table hit fields and merge columns - for thomason
        for set_name in header[end_of_regular_tables:end_of_thomason_tables]:

            field_values = []

            for table in thomason_set[set_name]:
                if row[table] == "+":
                    field_values.append(short_name[table])
                elif row[table] == "-":
                    continue
                else:
                    print "[warning] invalid value for cell"

            if len(field_values) > 0:
                total_articles += 1

            row_values.append(";".join(list(set(field_values))))

        # Go over the table hit fields and merge columns - for mcdowell
        for set_name in header[end_of_thomason_tables:end_of_tables]:

            field_values = []

            for table in mcdowell_set[set_name]:
                if row[table] == "+":
                    field_values.append(short_name[table])
                elif row[table] == "-":
                    continue
                else:
                    print "[warning] invalid value for cell"

            if len(field_values) > 0:
                total_articles += 1

            row_values.append(";".join(list(set(field_values))))

        row_values.append(total_articles)

        final_rows.append(row_values)

    # go over the rows and fix name, id, type notations
    names = get_name_dictionary(our_tables)
    ecocyc_ids = get_ecocyc_id_dictionary(our_tables)

    for row in final_rows:
        row[0] = names[row[0]]
        row[1] = ecocyc_ids[row[1]]
        row[2] = beauty_type_names[row[2]]

        if row[1].split(".")[-1].isdigit():
            row[1] = ".".join(row[1].split(".")[:-1])

        if ".TU" in row[0]:
            row[0] = row[0].replace(".TU", ".IGT")

        if ".TU" in row[1]:
            row[1] = row[1].replace(".TU", ".IGT")

    with open(output_file, "wb") as fl:

        fl.write("%s\n" % "\t".join(header))

        for row in final_rows:
            fl.write("%s\n" % "\t".join(str(val) for val in row))