Python genomic_exon_coordinate_rangesの例

プログラミング言語: Python

名前空間/パッケージ名: tripsSplicepy2

メソッド/関数: genomic_exon_coordinate_ranges

hotexamples.comのコード掲載数: 5

Python genomic_exon_coordinate_ranges - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのtripsSplicepy2.genomic_exon_coordinate_rangesの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def orfQuant_OPM(gene,
                 sqlite_path_organism,
                 sqlite_path_reads,
                 supported,
                 counts,
                 filter=True):
    exons = genomic_exon_coordinate_ranges(gene, sqlite_path_organism,
                                           supported)
    junctions = genomic_junction_positions(gene, sqlite_path_organism,
                                           supported)
    orf_coordinates = genomic_orf_coordinate_ranges(gene, sqlite_path_organism,
                                                    supported)
    orf_junctions = get_orf_coordinate_junctions(orf_coordinates)
    features_per_orf(orf_coordinates, orf_junctions, exons, junctions)
    junction_scores = genomic_junction_scores(gene,
                                              sqlite_path_organism,
                                              sqlite_path_reads,
                                              supported,
                                              filter=True)

    unique_shared_exons = classify_regions_shared_unique(exons)
    unique_shared_junctions = classify_regions_shared_unique(junctions)
    coverage_exons = region_coverage(exons, counts)
    coverage_junctions = coverage_junction_transcript(junction_scores)

    shared_coverage(coverage_exons, coverage_junctions)

    # pos_neg_exons = classify_regions_pos_neg(exons, supported, exclude=True, count_type="range")
    # pos_neg_junctions = classify_junctions_pos_neg(junction_scores)

    average_unique = average_unique_coverage(unique_shared_exons,
                                             unique_shared_junctions,
                                             coverage_junctions,
                                             coverage_exons)
    average_all = all_feature_average(coverage_junctions, coverage_exons)

    cORF = cORF_ratio(average_unique, average_all)

    all_shared = True
    for transcript in supported:
        if unique_shared_junctions[transcript][
                "unique"] == [] and unique_shared_exons[transcript][
                    "unique"] == []:
            cORF[transcript] = adjusted_coverage_for_non_unique_orfs(
                transcript, coverage_exons, coverage_junctions, average_all)
        else:
            all_shared = False

    if all_shared:
        cORF = shared_coverage(coverage_exons, coverage_junctions)
    else:
        cORF = cORF_ratio(average_unique, average_all)

    adjusted_a_sites = aORF(cORF, counts)
    orf_lengths = lORF(supported, sqlite_path_organism)

    orfs_per_million = ORFs_per_million(adjusted_a_sites, orf_lengths)
    return orfs_per_million

コード例 #2

ファイルを表示

def rna_seq_read_counting(gene, sqlite_path_organism, sqlite_path_reads, exclude=True, count_type="range"):
    if exclude:
        supported = filter_unsupported_transcripts(gene, sqlite_path_organism, sqlite_path_reads)
    else:
        gene_info = get_gene_info(gene, sqlite_path_organism)
        supported = [i[0] for i in gene_info]

    genomic_exon_coordinates = genomic_exon_coordinate_ranges(gene, sqlite_path_organism, supported)
    unique_regions = get_unique_regions(genomic_exon_coordinates)
    all_junctions, unique_junctions = unique_exon_junctions(genomic_exon_coordinates)
    junction_scores = get_scores_per_exonjunction_for_gene(gene, sqlite_path_organism, sqlite_path_reads, supported, filter=exclude)
    transcripts_to_explain_reads = explain_exon_junctions(junction_scores, all_junctions, unique_junctions)


    try:
        ["range", "fiveprime", "asite"].index(count_type)
    except ValueError:
        print("The count type must be one of 'range', 'fiveprime' or 'asite'. "
                "count_type refers to the part of the read that is used in the feature counting process")
        return "ERROR"

    if count_type == "range":
        genomic_read_ranges = get_read_ranges_genomic_location(gene, sqlite_path_reads, sqlite_path_organism, supported,
                                                           filter=exclude)
        counts = count_readranges_supporting_exons_per_transcript(unique_regions, genomic_read_ranges)

    if count_type == "fiveprime":
        genomic_read_positions = get_reads_per_genomic_location_fiveprime(gene, sqlite_path_reads, sqlite_path_organism,
                                                                          supported, filter=exclude)
        counts = count_read_supporting_regions_per_transcript(unique_regions, genomic_read_positions)

    if count_type == "asite":
        genomic_read_positions = get_reads_per_genomic_location_asite(gene, sqlite_path_reads, sqlite_path_organism,
                                                                          supported, filter=exclude)
        counts = count_read_supporting_regions_per_transcript(unique_regions, genomic_read_positions)


    sum_of_exon_counts = {}
    maximum_sum = 0

    for transcript in counts:
        transcript_sum = sum(counts[transcript])
        if (transcript_sum > 0) and (transcript not in transcripts_to_explain_reads):
            transcripts_to_explain_reads.append(str(transcript))
        sum_of_exon_counts[transcript] = transcript_sum

        if transcript_sum > maximum_sum:
            maximum_sum = transcript_sum
            maximum_transcript = transcript

    print("The transcript with most uniquely mapped reads is {maximum_transcript} with a score of {maximum_sum}".format(maximum_transcript=str(maximum_transcript), maximum_sum=maximum_sum) )
    return transcripts_to_explain_reads

コード例 #3

ファイルを表示

from tripsSplicepy2 import get_protein_coding_transcript_ids
from tripsSplicepy2 import get_reads_per_genomic_location_asite

from tripsCountpy2 import ribo_seq_read_counting
from tripsCountpy2 import ribo_seq_read_counting_raw

if __name__ == "__main__":
    start = time.time()
    gene = "phpt1"
    sqlite_path_organism = "homo_sapiens.v2.sqlite"
    sqlite_path_reads = ["SRR2433794.sqlite"]
    coding = get_protein_coding_transcript_ids(gene, sqlite_path_organism)
    genomic_read_positions = get_reads_per_genomic_location_asite(
        gene, sqlite_path_reads, sqlite_path_organism, coding, filter=True)
    # genomic_read_positions = get_reads_per_genomic_location_fiveprime(gene, sqlite_path_reads, sqlite_path_organism, coding, filter=True)
    exons = genomic_exon_coordinate_ranges(gene, sqlite_path_organism, coding)

    counts = count_read_supporting_regions_per_transcript(
        exons, genomic_read_positions)

    orfQuant_res = orfQuant_OPM(gene,
                                sqlite_path_organism,
                                sqlite_path_reads,
                                coding,
                                counts,
                                filter=True)
    end = time.time()
    print("ORFquant OPM time: " + str(end - start))

    start = time.time()
    gene = "phpt1"

コード例 #4

ファイルを表示

ファイル: single_transcript_routes.py プロジェクト: JackCurragh/MScThesis2020

def query():
	global user_short_passed
	tran_dict = {}
	gene_dict = {}
	ribo_user_files = {}
	data = ast.literal_eval(request.data)

	tran = data['transcript'].upper().strip()
	readscore = data['readscore']
	secondary_readscore = data['secondary_readscore']
	minread = int(data['minread'])
	maxread = int(data['maxread'])
	minfiles = int(data['minfiles'])
	organism = data['organism']
	seqhili = data['seqhili'].split(",")
	hili_start = int(data['hili_start'])
	hili_stop = int(data['hili_stop'])
	transcriptome = data['transcriptome']
	advanced =  data["advanced"]

	# Send file_list (a list of integers intentionally encoded as strings due to javascript), to be converted to a dictionary with riboseq/rnaseq lists of file paths.
	file_paths_dict = fetch_file_paths(data["file_list"],organism)

	primetype = data["primetype"]
	user_hili_starts = data["user_hili_starts"]
	user_hili_stops = data["user_hili_stops"]
	user_short = data["user_short"]

	connection = sqlite3.connect('{}/trips.sqlite'.format(config.SCRIPT_LOC))
	connection.text_factory = str
	cursor = connection.cursor()
	cursor.execute("SELECT owner FROM organisms WHERE organism_name = '{}' and transcriptome_list = '{}';".format(organism, transcriptome))
	owner = (cursor.fetchone())[0]

	if owner == 1:
		if os.path.isfile("{0}{1}/{1}.{2}.sqlite".format(config.ANNOTATION_DIR,organism,transcriptome)):
			sqlite_path_organism = "{0}{1}/{1}.{2}.sqlite".format(config.ANNOTATION_DIR,organism,transcriptome)
			transhelve = sqlite3.connect("{0}{1}/{1}.{2}.sqlite".format(config.ANNOTATION_DIR,organism,transcriptome))
		else:
			return "Cannot find annotation file {}.{}.sqlite".format(organism,transcriptome)
	else:
		sqlite_path_organism = "{0}transcriptomes/{1}/{2}/{3}/{2}_{3}.v2.sqlite".format(config.UPLOADS_DIR,owner,organism,transcriptome)
		transhelve = sqlite3.connect("{0}transcriptomes/{1}/{2}/{3}/{2}_{3}.v2.sqlite".format(config.UPLOADS_DIR,owner,organism,transcriptome))
	cursor = transhelve.cursor()
	cursor.execute("SELECT * from transcripts WHERE transcript = '{}'".format(tran))
	
	result = cursor.fetchone()
	inputtran = True

	if result != None:
		newtran = result[0]
	else:
		inputtran = False
	if inputtran == False:
		cursor.execute("SELECT * from transcripts WHERE gene = '{}'".format(tran))
		result = cursor.fetchall()

		if result != []:
			if len(result) == 1:
				tran = str(result[0][0])
			else:
				return_str = "TRANSCRIPTS"
				f = open("logfile.txt", "w")
				coding = get_protein_coding_transcript_ids(tran, sqlite_path_organism)

				genomic_read_positions = get_reads_per_genomic_location_asite(tran, file_paths_dict["riboseq"].values(),
																				  sqlite_path_organism, coding, filter=True)
				exons = genomic_exon_coordinate_ranges(tran, sqlite_path_organism, coding)

				counts = count_read_supporting_regions_per_transcript(exons, genomic_read_positions)

				orfQuant_res = orfQuant(tran, sqlite_path_organism, file_paths_dict["riboseq"].values(), coding, counts, filter=True)


				f.close()
				for transcript in result:
					cursor.execute("SELECT length,cds_start,cds_stop,principal,version from transcripts WHERE transcript = '{}'".format(transcript[0]))
					tran_result = cursor.fetchone()
					tranlen = tran_result[0]
					cds_start = tran_result[1]
					cds_stop = tran_result[2]
					if str(tran_result[3]) == "1":
						principal = "principal"
					else:
						principal = ""
					version = tran_result[4]
					if cds_start == "NULL" or cds_start == None:
						cdslen = "NULL"
						threeutrlen = "NULL"
					else:
						cdslen = cds_stop-cds_start
						threeutrlen = tranlen - cds_stop
					if transcript[0] in orfQuant_res:
						coverage = orfQuant_res[transcript[0]]
					else:
						coverage = "NULL"

					return_str += (":{},{},{},{},{},{},{}".format(transcript[0],version, tranlen, cds_start, cdslen, threeutrlen,coverage))

				return return_str
				
		else:
			return "ERROR! Could not find any transcript corresponding to {}".format(tran)
	transhelve.close()
	if 'varlite' in data:
		lite = "y"
	else:
		lite="n"
	if 'preprocess' in data:
		preprocess = True
	else:
		preprocess = False
	if 'uga_diff' in data:
		uga_diff = True
	else:
		uga_diff = False
	if 'color_readlen_dist' in data:
		color_readlen_dist = True
	else:
		color_readlen_dist = False
	if 'ribocoverage' in data:
		ribocoverage = True
	else:
		ribocoverage = False
	if "nucseq" in data:
		nucseq = True
	else:
		nucseq = False
	if "mismatches" in data:
		mismatches = True
	else:
		mismatches = False
	if "ambiguous" in data:
		ambiguous = "ambig"
	else:
		ambiguous = "unambig"
	if "pcr" in data:
		pcr = True
	else:
		pcr = False
	if "noisered" in data:
		noisered = True
	else:
		noisered = False

	if "mismatch" in data:
		mismatch = True
	else:
		mismatch = False
	if data["user_short"] == "None" or user_short_passed == True:
		short_code = generate_short_code(data,organism,data["transcriptome"],"interactive_plot")
	else:
		short_code = data["user_short"]
		user_short_passed = True
	try:
		user = current_user.name
	except:
		user = None
	connection = sqlite3.connect('{}/trips.sqlite'.format(config.SCRIPT_LOC))
	connection.text_factory = str
	cursor = connection.cursor()
	background_col = config.BACKGROUND_COL
	uga_col = config.UGA_COL
	uag_col = config.UAG_COL
	uaa_col = config.UAA_COL
	title_size = config.TITLE_SIZE
	subheading_size = config.SUBHEADING_SIZE
	axis_label_size = config.AXIS_LABEL_SIZE
	marker_size = config.MARKER_SIZE
	cds_marker_size = config.CDS_MARKER_SIZE
	cds_marker_colour = config.CDS_MARKER_COLOUR
	legend_size = config.LEGEND_SIZE
	ribo_linewidth = config.RIBO_LINEWIDTH
	#Put any publicly available seq types (apart from riboseq and rnaseq) here
	seq_rules = {"proteomics":{"frame_breakdown":1},"conservation":{"frame_breakdown":1},"tcpseq":{"frame_breakdown":0}}

	#get user_id
	if user != None:
		cursor.execute("SELECT user_id from users WHERE username = '******';".format(user))
		result = (cursor.fetchone())
		user_id = result[0]
		#get a list of organism id's this user can access
		cursor.execute("SELECT background_col,uga_col,uag_col,uaa_col,title_size,subheading_size,axis_label_size,marker_size,cds_marker_width,cds_marker_colour,legend_size,ribo_linewidth from user_settings WHERE user_id = '{}';".format(user_id))
		result = (cursor.fetchone())
		background_col = result[0]
		uga_col = result[1]
		uag_col = result[2]
		uaa_col = result[3]
		title_size = result[4]
		subheading_size = result[5]
		axis_label_size = result[6]
		marker_size = result[7]
		cds_marker_size = result[8]
		cds_marker_colour = result[9]
		legend_size = result[10]
		ribo_linewidth = result[11]
		#get rules for all custom seq types
		cursor.execute("SELECT * from seq_rules WHERE user_id = {};".format(user_id))
		result = (cursor.fetchall())
		for row in result:
			seq_name = row[1]
			frame_breakdown = row[2]
			seq_rules[seq_name] = {"frame_breakdown":frame_breakdown}
		connection.close()

	if tran != "":
		x = riboflask.generate_plot(tran, ambiguous, minread, maxread, lite , ribocoverage, organism, readscore, noisered,primetype,
								   minfiles,nucseq, user_hili_starts, user_hili_stops,uga_diff,file_paths_dict,short_code, color_readlen_dist,
								   background_col,uga_col, uag_col, uaa_col,advanced,config.ANNOTATION_DIR,seqhili,seq_rules,title_size,
								   subheading_size,axis_label_size,marker_size,transcriptome,config.UPLOADS_DIR,cds_marker_size,cds_marker_colour,
								   legend_size,ribo_linewidth,secondary_readscore,pcr,mismatches,hili_start, hili_stop)
	else:
		x = "ERROR! Could not find any transcript corresponding to whatever you entered"
	return x

コード例 #5

ファイルを表示

def orfQuant_signal(gene,
                    sqlite_path_organism,
                    sqlite_path_reads,
                    supported,
                    counts,
                    filter=True):
    exons = genomic_exon_coordinate_ranges(gene, sqlite_path_organism,
                                           supported)
    junctions = genomic_junction_positions(gene, sqlite_path_organism,
                                           supported)
    orf_coordinates = genomic_orf_coordinate_ranges(gene, sqlite_path_organism,
                                                    supported)
    orf_junctions = get_orf_coordinate_junctions(orf_coordinates)
    features_per_orf(orf_coordinates, orf_junctions, exons, junctions)
    junction_scores = genomic_junction_scores(gene,
                                              sqlite_path_organism,
                                              sqlite_path_reads,
                                              supported,
                                              filter=True)

    unique_shared_exons = classify_regions_shared_unique(exons)
    unique_shared_junctions = classify_regions_shared_unique(junctions)
    coverage_exons = region_coverage(exons, counts)
    coverage_junctions = coverage_junction_transcript(junction_scores)

    shared_coverage(coverage_exons, coverage_junctions)

    # pos_neg_exons = classify_regions_pos_neg(exons, supported, exclude=True, count_type="range")
    # pos_neg_junctions = classify_junctions_pos_neg(junction_scores)

    average_unique = average_unique_coverage(unique_shared_exons,
                                             unique_shared_junctions,
                                             coverage_junctions,
                                             coverage_exons)
    average_all = all_feature_average(coverage_junctions, coverage_exons)

    cORF = cORF_ratio(average_unique, average_all)

    all_shared = True
    for transcript in supported:
        if unique_shared_junctions[transcript][
                "unique"] == [] and unique_shared_exons[transcript][
                    "unique"] == []:
            cORF[transcript] = adjusted_coverage_for_non_unique_orfs(
                transcript, coverage_exons, coverage_junctions, average_all)
        else:
            all_shared = False

    if all_shared:
        cORF = shared_coverage(coverage_exons, coverage_junctions)
    else:
        cORF = cORF_ratio(average_unique, average_all)

    adjusted_a_sites = aORF(cORF, counts)
    orf_lengths = lORF(supported, sqlite_path_organism)

    gene_signal_per_orf = pct_gene_signal_per_orf(adjusted_a_sites)

    return gene_signal_per_orf


# if __name__ == "__main__":
#     start= time.time()
#     gene = "igf2"
#     sqlite_path_organism = "homo_sapiens.v2.sqlite"
#     sqlite_path_reads = ["SRR2433794.sqlite"]
#     coding = get_protein_coding_transcript_ids(gene, sqlite_path_organism)
#     genomic_read_positions = get_reads_per_genomic_location_asite(gene, sqlite_path_reads, sqlite_path_organism,
#                                                                   coding, filter=True)
#     # genomic_read_positions = get_reads_per_genomic_location_fiveprime(gene, sqlite_path_reads, sqlite_path_organism, coding, filter=True)
#     exons = genomic_exon_coordinate_ranges(gene, sqlite_path_organism, coding)
#
#     counts = count_read_supporting_regions_per_transcript(exons, genomic_read_positions)
#
#     orfQuant_res = orfQuant_OPM(gene, sqlite_path_organism, sqlite_path_reads, coding, counts, filter=True)
#     end = time.time()
#     print(end - start)
#     rankings = rank_based_on_dict_values(orfQuant_res)
#     print rankings