def _append_count_to_matrix(qtl_matrixfile, lod_threshold): """ Append an extra column at the end of the matrix file containing for each row (marker) the number of QTL found if the marker is known ie: Locus != '' :arg qtl_matrix, the matrix in which to save the output. :arg threshold, threshold used to determine if a given LOD value is reflective the presence of a QTL. """ if not os.path.exists(qtl_matrixfile): # pragma: no cover raise MQ2Exception('File not found: "%s"' % qtl_matrixfile) matrix = read_input_file(qtl_matrixfile, sep=',') tmp = list(matrix[0]) tmp.append('# QTLs') matrix[0] = tmp cnt = 1 while cnt < len(matrix): row = list(matrix[cnt]) nr_qtl = 0 for cel in row[3:]: if cel and float(cel) > float(lod_threshold): nr_qtl = nr_qtl + 1 row.append(str(nr_qtl)) matrix[cnt] = row cnt = cnt + 1 write_matrix(qtl_matrixfile, matrix)
def add_marker_to_qtls(qtlfile, mapfile, outputfile='qtls_with_mk.csv'): """This function adds to a list of QTLs, the closest marker to the QTL peak. :arg qtlfile: a CSV list of all the QTLs found. The file should be structured as follow:: Trait, Linkage group, position, other columns The other columns will not matter as long as the first three columns are as such. :arg mapfile: a CSV representation of the map used for the QTL mapping analysis. The file should be structured as follow:: Marker, Linkage group, position :kwarg outputfile: the name of the output file in which the list of QTLs with their closest marker will be written. """ qtl_list = read_input_file(qtlfile, ',') map_list = read_input_file(mapfile, ',') if not qtl_list or not map_list: # pragma: no cover return qtl_list[0].append('Closest marker') qtls = [] qtls.append(qtl_list[0]) for qtl in qtl_list[1:]: qtl.append(add_marker_to_qtl(qtl, map_list)) qtls.append(qtl) LOG.info('- %s QTLs processed in %s' % (len(qtls), qtlfile)) write_matrix(outputfile, qtls)
def append_flanking_markers(qtls_mk_file, flanking_markers): """ Append the flanking markers extracted in the process of generating the MapChart to the QTL list file. """ matrix = read_input_file(qtls_mk_file, sep=',') output = [] cnt = 0 for row in matrix: if cnt == 0: markers = ['LOD2 interval start', 'LOD2 interval end'] elif row[3] in flanking_markers: markers = flanking_markers[row[3]] else: markers = ['NA', 'NA'] cnt += 1 row.extend(markers) output.append(row) write_matrix(qtls_mk_file, output)
def add_qtl_to_map(qtlfile, mapfile, outputfile='map_with_qtls.csv'): """ This function adds to a genetic map for each marker the number of significant QTLs found. :arg qtlfile, the output from MapQTL transformed to a csv file via 'parse_mapqtl_file' which contains the closest markers. :arg mapfile, the genetic map with all the markers. :kwarg outputfile, the name of the output file in which the map will be written. """ qtl_list = read_input_file(qtlfile, ',') map_list = read_input_file(mapfile, ',') map_list[0].append('# QTLs') markers = [] markers.append(map_list[0]) qtl_cnt = 0 for marker in map_list[1:]: markers.append(add_qtl_to_marker(marker, qtl_list[1:])) qtl_cnt = qtl_cnt + int(markers[-1][-1]) LOG.info('- %s markers processed in %s' % (len(markers), mapfile)) LOG.info('- %s QTLs located in the map: %s' % (qtl_cnt, outputfile)) write_matrix(outputfile, markers)
def convert_inputfiles( cls, folder=None, inputfile=None, session=None, lod_threshold=None, qtls_file="qtls.csv", matrix_file="qtls_matrix.csv", map_file="map.csv", ): """ Convert the input files present in the given folder or inputfile. This method creates the matrix representation of the QTLs results providing for each marker position the LOD value found for each trait as well as a representation of the genetic map used in the experiment. The genetic map should be cleared of any markers added by the QTL mapping software. :kwarg folder: the path to the folder containing the files to check. This folder may contain sub-folders. :kwarg inputfile: the path to the input file to use :kwarg session: the session identifier used to identify which session to process :kwarg lod_threshold: the LOD threshold to apply to determine if a QTL is significant or not :kwarg qtls_file: a csv file containing the list of all the significant QTLs found in the analysis. The matrix is of type: trait, linkage group, position, Marker, LOD other columns :kwarg matrix_file: a csv file containing a matrix representation of the QTL data. This matrix is of type: marker, linkage group, position, trait1 lod, trait2, lod :kwarg map_file: a csv file containing the genetic map used in this experiment. The map is of structure: marker, linkage group, position """ if folder is None and inputfile is None: raise MQ2Exception("You must specify either a folder or an " "input file") sessions = cls.get_session_identifiers(folder) if session is None: raise MQ2NoSessionException( "The MapQTL plugin requires a session identifier to " "identify the session to process." "Sessions are: %s" % ",".join(sessions) ) elif str(session) not in sessions: raise MQ2NoSuchSessionException( "The MapQTL session provided (%s) could not be found in the " "dataset. " "Sessions are: %s" % (session, ",".join(sessions)) ) if folder is not None: if not os.path.isdir(folder): # pragma: no cover raise MQ2Exception("The specified folder is actually " "not a folder") else: inputfiles = cls.get_files(folder, session_id=session) if inputfile is not None: # pragma: no cover if os.path.isdir(inputfile): raise MQ2Exception("The specified input file is actually " "a folder") else: inputfiles = [inputfile] try: lod_threshold = float(lod_threshold) except ValueError: raise MQ2Exception("LOD threshold should be a number") inputfiles.sort() # QTL matrix and QTL files qtl_matrix = [] qtls = [] filename = None for filename in inputfiles: matrix = read_input_file(filename) headers = matrix[0] qtl_matrix = get_qtls_matrix(qtl_matrix, matrix, filename) qtls.extend(get_qtls_from_mapqtl_data(matrix, lod_threshold, filename)) # format QTLs and write down the selection headers[0] = "Trait name" qtls.insert(0, headers) write_matrix(qtls_file, qtls) # Write down the QTL matrix del (qtl_matrix[0]) # Reorganize a couple of columns qtl_matrix.insert(0, qtl_matrix[2]) del (qtl_matrix[3]) # write output qtl_matrix = list(zip(*qtl_matrix)) write_matrix(matrix_file, qtl_matrix) # Map matrix map_matrix = get_map_matrix(inputfiles[0]) write_matrix(map_file, map_matrix)
def convert_inputfiles(cls, folder=None, inputfile=None, session=None, lod_threshold=None, qtls_file='qtls.csv', matrix_file='qtls_matrix.csv', map_file='map.csv'): """ Convert the input files present in the given folder or inputfile. This method creates the matrix representation of the QTLs results providing for each marker position the LOD value found for each trait as well as a representation of the genetic map used in the experiment. The genetic map should be cleared of any markers added by the QTL mapping software. :kwarg folder: the path to the folder containing the files to check. This folder may contain sub-folders. :kwarg inputfile: the path to the input file to use :kwarg session: the session identifier used to identify which session to process :kwarg lod_threshold: the LOD threshold to apply to determine if a QTL is significant or not :kwarg qtls_file: a csv file containing the list of all the significant QTLs found in the analysis. The matrix is of type: trait, linkage group, position, Marker, LOD other columns :kwarg matrix_file: a csv file containing a matrix representation of the QTL data. This matrix is of type: marker, linkage group, position, trait1 lod, trait2, lod :kwarg map_file: a csv file containing the genetic map used in this experiment. The map is of structure: marker, linkage group, position """ if folder is None and inputfile is None: raise MQ2Exception('You must specify either a folder or an ' 'input file') if folder is not None: # pragma: no cover if not os.path.isdir(folder): raise MQ2Exception('The specified folder is actually ' 'not a folder') else: inputfiles = cls.get_files(folder) if inputfile is not None: # pragma: no cover if os.path.isdir(inputfile): raise MQ2Exception('The specified input file is actually ' 'a folder') else: inputfiles = [inputfile] if len(inputfiles) == 0: # pragma: no cover raise MQ2Exception('No files correspond to this plugin') if len(inputfiles) > 1: # pragma: no cover raise MQ2Exception( 'This plugin can only process one file at a time') try: lod_threshold = float(lod_threshold) except ValueError: raise MQ2Exception('LOD threshold should be a number') inputfile = inputfiles[0] # QTL matrix and QTL files qtls = [] matrix = read_input_file(inputfile, sep=',', noquote=True) qtls.extend(get_qtls_from_rqtl_data(matrix, lod_threshold)) # format QTLs and write down the selection write_matrix(qtls_file, qtls) # Write down the QTL matrix write_matrix(matrix_file, matrix) # Map matrix map_matrix = get_map_matrix(inputfile) write_matrix(map_file, map_matrix)
def convert_inputfiles(cls, folder=None, inputfile=None, session=None, lod_threshold=None, qtls_file='qtls.csv', matrix_file='qtls_matrix.csv', map_file='map.csv'): """ Convert the input files present in the given folder or inputfile. This method creates the matrix representation of the QTLs results providing for each marker position the LOD value found for each trait as well as a representation of the genetic map used in the experiment. The genetic map should be cleared of any markers added by the QTL mapping software. :kwarg folder: the path to the folder containing the files to check. This folder may contain sub-folders. :kwarg inputfile: the path to the input file to use :kwarg session: the session identifier used to identify which session to process :kwarg lod_threshold: the LOD threshold to apply to determine if a QTL is significant or not :kwarg qtls_file: a csv file containing the list of all the significant QTLs found in the analysis. The matrix is of type: trait, linkage group, position, Marker, LOD other columns :kwarg matrix_file: a csv file containing a matrix representation of the QTL data. This matrix is of type: marker, linkage group, position, trait1 lod, trait2, lod :kwarg map_file: a csv file containing the genetic map used in this experiment. The map is of structure: marker, linkage group, position """ if folder is None and inputfile is None: raise MQ2Exception('You must specify either a folder or an ' 'input file') sessions = cls.get_session_identifiers(folder) if session is None: raise MQ2NoSessionException( 'The MapQTL plugin requires a session identifier to ' 'identify the session to process.' 'Sessions are: %s' % ','.join(sessions)) elif str(session) not in sessions: raise MQ2NoSuchSessionException( 'The MapQTL session provided (%s) could not be found in the ' 'dataset. ' 'Sessions are: %s' % (session, ','.join(sessions))) if folder is not None: if not os.path.isdir(folder): # pragma: no cover raise MQ2Exception('The specified folder is actually ' 'not a folder') else: inputfiles = cls.get_files(folder, session_id=session) if inputfile is not None: # pragma: no cover if os.path.isdir(inputfile): raise MQ2Exception('The specified input file is actually ' 'a folder') else: inputfiles = [inputfile] try: lod_threshold = float(lod_threshold) except ValueError: raise MQ2Exception('LOD threshold should be a number') inputfiles.sort() # QTL matrix and QTL files qtl_matrix = [] qtls = [] filename = None for filename in inputfiles: matrix = read_input_file(filename) headers = matrix[0] qtl_matrix = get_qtls_matrix(qtl_matrix, matrix, filename) qtls.extend( get_qtls_from_mapqtl_data(matrix, lod_threshold, filename)) # format QTLs and write down the selection headers[0] = 'Trait name' qtls.insert(0, headers) write_matrix(qtls_file, qtls) # Write down the QTL matrix del (qtl_matrix[0]) # Reorganize a couple of columns qtl_matrix.insert(0, qtl_matrix[2]) del (qtl_matrix[3]) # write output qtl_matrix = list(zip(*qtl_matrix)) write_matrix(matrix_file, qtl_matrix) # Map matrix map_matrix = get_map_matrix(inputfiles[0]) write_matrix(map_file, map_matrix)