def get_plugin_and_folder(inputzip=None, inputdir=None, inputfile=None): """ Main function. """ if (inputzip and inputdir) \ or (inputzip and inputfile) \ or (inputdir and inputfile): raise MQ2Exception('You must provide either a zip file or a ' 'directory or an input file as input.') if not inputzip and not inputdir and not inputfile: raise MQ2Exception('You must provide either a zip file or a ' 'directory or an input file as input.') # retrieve input: file, directory, zip if inputzip: tmp_folder = set_tmp_folder() extract_zip(inputzip, tmp_folder) elif inputfile: tmp_folder = inputfile else: tmp_folder = inputdir # retrieve the plugins plugins = load('MQ2.plugins', subclasses=PluginInterface) LOG.debug('Plugin loaded: %s' % [plugin.name for plugin in plugins]) # keep only the plugins that will work plugins = [plugin for plugin in plugins if plugin.is_applicable()] LOG.debug('Plugin applicable: %s' % [plugin.name for plugin in plugins]) # keep only the plugins that have the file(s) they need if inputfile: plugins = [ plugin for plugin in plugins if plugin.valid_file(tmp_folder) ] else: plugins = [ plugin for plugin in plugins if plugin.get_files(tmp_folder) ] LOG.debug('Plugin w/ valid input: %s' % [plugin.name for plugin in plugins]) if len(plugins) > 1: raise MQ2Exception('Your dataset contains valid input for ' 'several plugins.') if len(plugins) == 0: raise MQ2Exception('Invalid dataset: your input cannot not be ' 'processed by any of the current plugins.') plugin = plugins[0] return (plugin, tmp_folder)
def get_qtls_matrix(qtl_matrix, matrix, inputfile): """Extract for each position the LOD value obtained and save it in a matrix. This assumes that the first 4 columns are identical accross all mqo files (ie: the Group, Position and Locus are the same). This assumption should hold true if the files were generated from the same map. :arg qtl_matrix, the matrix in which to save the output. :arg matrix, the MapQTL file read in memory. :arg inputfile, name of the inputfile in which the QTLs have been found. """ trait_name = inputfile.split(')_', 1)[1].split('.mqo')[0] matrix = list(zip(*matrix)) if matrix[4][0] != 'LOD': raise MQ2Exception( 'The file "%s" is not supported by MQ2. It may contain an ' 'analysis which does not return LOD values ' '(such as Kruskal-Wallis or permutation test).' % inputfile) if not qtl_matrix: qtl_matrix = matrix[:4] else: if matrix[:4] != qtl_matrix[:4]: raise MQ2NoMatrixException( 'The map used in the file "%s" does not' ' correspond to the map used in at least one other file.' % inputfile) tmp = list(matrix[4]) tmp[0] = trait_name qtl_matrix.append(tmp) return qtl_matrix
def _append_count_to_matrix(qtl_matrixfile, lod_threshold): """ Append an extra column at the end of the matrix file containing for each row (marker) the number of QTL found if the marker is known ie: Locus != '' :arg qtl_matrix, the matrix in which to save the output. :arg threshold, threshold used to determine if a given LOD value is reflective the presence of a QTL. """ if not os.path.exists(qtl_matrixfile): # pragma: no cover raise MQ2Exception('File not found: "%s"' % qtl_matrixfile) matrix = read_input_file(qtl_matrixfile, sep=',') tmp = list(matrix[0]) tmp.append('# QTLs') matrix[0] = tmp cnt = 1 while cnt < len(matrix): row = list(matrix[cnt]) nr_qtl = 0 for cel in row[3:]: if cel and float(cel) > float(lod_threshold): nr_qtl = nr_qtl + 1 row.append(str(nr_qtl)) matrix[cnt] = row cnt = cnt + 1 write_matrix(qtl_matrixfile, matrix)
def get_session_identifiers(cls, folder=None, inputfile=None): """ Retrieve the list of session identifiers contained in the data on the folder or the inputfile. For this plugin, it returns the list of excel sheet available. :kwarg folder: the path to the folder containing the files to check. This folder may contain sub-folders. :kwarg inputfile: the path to the input file to use """ sessions = [] if inputfile and folder: raise MQ2Exception('You should specify either a folder or a file') if folder: if not os.path.isdir(folder): return sessions for root, dirs, files in os.walk(folder): for filename in files: filename = os.path.join(root, filename) for ext in SUPPORTED_FILES: if filename.endswith(ext): wbook = xlrd.open_workbook(filename) for sheet in wbook.sheets(): if sheet.name not in sessions: sessions.append(sheet.name) elif inputfile: if os.path.isdir(inputfile): return sessions for ext in SUPPORTED_FILES: if inputfile.endswith(ext): wbook = xlrd.open_workbook(inputfile) for sheet in wbook.sheets(): if sheet.name not in sessions: sessions.append(sheet.name) return sessions
def mq2_run(session_id, plugin, folder, lod_threshold, session): """ Run the scripts to extract the QTLs. :arg session_id: the session identifier uniquely identifying the MapQTL zip file and the JoinMap map file. The session identifier also uniquely identifies the folder in which are the files uploaded. :arg lod_threshold: the LOD threshold to use to consider a value significant for a QTL. :arg mapqtl_session: the MapQTL session/run from which to retrieve the QTLs. """ upload_folder = os.path.join(UPLOAD_FOLDER, session_id) already_done = experiment_done(session_id, lod_threshold, session) if already_done is not False: return already_done exp_id = '%s_s%s_t%s' % (generate_exp_id(), session, lod_threshold) exp_folder = os.path.join(upload_folder, exp_id) if not os.path.exists(exp_folder): os.mkdir(exp_folder) try: run_mq2(plugin, folder, lod_threshold=lod_threshold, session=session, outputfolder=exp_folder) (nline, ncol) = get_matrix_dimensions( os.path.join(exp_folder, 'qtls_matrix.csv')) except MQ2Exception, err: shutil.rmtree(exp_folder) raise MQ2Exception(err)
def read_excel_file(inputfile, sheet_name): """ Return a matrix containing all the information present in the excel sheet of the specified excel document. :arg inputfile: excel document to read :arg sheetname: the name of the excel sheet to return """ workbook = xlrd.open_workbook(inputfile) output = [] found = False for sheet in workbook.sheets(): if sheet.name == sheet_name: found = True for row in range(sheet.nrows): values = [] for col in range(sheet.ncols): values.append(sheet.cell(row, col).value) output.append(values) if not found: # pragma: no cover raise MQ2Exception('Invalid session identifier provided') return output
def convert_inputfiles(cls, folder=None, inputfile=None, session=None, lod_threshold=None, qtls_file='qtls.csv', matrix_file='qtls_matrix.csv', map_file='map.csv'): """ Convert the input files present in the given folder or inputfile. This method creates the matrix representation of the QTLs results providing for each marker position the LOD value found for each trait as well as a representation of the genetic map used in the experiment. The genetic map should be cleared of any markers added by the QTL mapping software. :kwarg folder: the path to the folder containing the files to check. This folder may contain sub-folders. :kwarg inputfile: the path to the input file to use :kwarg session: the session identifier used to identify which session to process :kwarg lod_threshold: the LOD threshold to apply to determine if a QTL is significant or not :kwarg qtls_file: a csv file containing the list of all the significant QTLs found in the analysis. The matrix is of type: trait, linkage group, position, Marker, LOD other columns :kwarg matrix_file: a csv file containing a matrix representation of the QTL data. This matrix is of type: marker, linkage group, position, trait1 lod, trait2, lod :kwarg map_file: a csv file containing the genetic map used in this experiment. The map is of structure: marker, linkage group, position """ if folder is None and inputfile is None: raise MQ2Exception('You must specify either a folder or an ' 'input file') sessions = cls.get_session_identifiers(folder) if session is None: raise MQ2NoSessionException( 'The MapQTL plugin requires a session identifier to ' 'identify the session to process.' 'Sessions are: %s' % ','.join(sessions)) elif str(session) not in sessions: raise MQ2NoSuchSessionException( 'The MapQTL session provided (%s) could not be found in the ' 'dataset. ' 'Sessions are: %s' % (session, ','.join(sessions))) if folder is not None: if not os.path.isdir(folder): # pragma: no cover raise MQ2Exception('The specified folder is actually ' 'not a folder') else: inputfiles = cls.get_files(folder, session_id=session) if inputfile is not None: # pragma: no cover if os.path.isdir(inputfile): raise MQ2Exception('The specified input file is actually ' 'a folder') else: inputfiles = [inputfile] try: lod_threshold = float(lod_threshold) except ValueError: raise MQ2Exception('LOD threshold should be a number') inputfiles.sort() # QTL matrix and QTL files qtl_matrix = [] qtls = [] filename = None for filename in inputfiles: matrix = read_input_file(filename) headers = matrix[0] qtl_matrix = get_qtls_matrix(qtl_matrix, matrix, filename) qtls.extend( get_qtls_from_mapqtl_data(matrix, lod_threshold, filename)) # format QTLs and write down the selection headers[0] = 'Trait name' qtls.insert(0, headers) write_matrix(qtls_file, qtls) # Write down the QTL matrix del (qtl_matrix[0]) # Reorganize a couple of columns qtl_matrix.insert(0, qtl_matrix[2]) del (qtl_matrix[3]) # write output qtl_matrix = list(zip(*qtl_matrix)) write_matrix(matrix_file, qtl_matrix) # Map matrix map_matrix = get_map_matrix(inputfiles[0]) write_matrix(map_file, map_matrix)
def convert_inputfiles(cls, folder=None, inputfile=None, session=None, lod_threshold=None, qtls_file='qtls.csv', matrix_file='qtls_matrix.csv', map_file='map.csv'): """ Convert the input files present in the given folder or inputfile. This method creates the matrix representation of the QTLs results providing for each marker position the LOD value found for each trait as well as a representation of the genetic map used in the experiment. The genetic map should be cleared of any markers added by the QTL mapping software. :kwarg folder: the path to the folder containing the files to check. This folder may contain sub-folders. :kwarg inputfile: the path to the input file to use :kwarg session: the session identifier used to identify which session to process :kwarg lod_threshold: the LOD threshold to apply to determine if a QTL is significant or not :kwarg qtls_file: a csv file containing the list of all the significant QTLs found in the analysis. The matrix is of type: trait, linkage group, position, Marker, LOD other columns :kwarg matrix_file: a csv file containing a matrix representation of the QTL data. This matrix is of type: marker, linkage group, position, trait1 lod, trait2, lod :kwarg map_file: a csv file containing the genetic map used in this experiment. The map is of structure: marker, linkage group, position """ if folder is None and inputfile is None: raise MQ2Exception('You must specify either a folder or an ' 'input file') if folder is not None: # pragma: no cover if not os.path.isdir(folder): raise MQ2Exception('The specified folder is actually ' 'not a folder') else: inputfiles = cls.get_files(folder) if inputfile is not None: # pragma: no cover if os.path.isdir(inputfile): raise MQ2Exception('The specified input file is actually ' 'a folder') else: inputfiles = [inputfile] if len(inputfiles) == 0: # pragma: no cover raise MQ2Exception('No files correspond to this plugin') if len(inputfiles) > 1: # pragma: no cover raise MQ2Exception( 'This plugin can only process one file at a time') try: lod_threshold = float(lod_threshold) except ValueError: raise MQ2Exception('LOD threshold should be a number') inputfile = inputfiles[0] # QTL matrix and QTL files qtls = [] matrix = read_input_file(inputfile, sep=',', noquote=True) qtls.extend(get_qtls_from_rqtl_data(matrix, lod_threshold)) # format QTLs and write down the selection write_matrix(qtls_file, qtls) # Write down the QTL matrix write_matrix(matrix_file, matrix) # Map matrix map_matrix = get_map_matrix(inputfile) write_matrix(map_file, map_matrix)