retmax = 5000 # Instantiate search object. sra_search = SRASearch(query=query, retmax=retmax, email=email_bruno) # Execute search itself. sra_search.esearch() # Fetch metadata from packages. packages = [SRAPackage(sra_id) for sra_id in sra_search.idlist] # Store packages in data frame for filtering. package_filter = FilterPackages(packages) # copy working data frame. df = package_filter.data_frame # Filter booleans. filtered_df = df[df.library_layout == 'PAIRED'][df.nreads > 1][df.read_average >= 70] # Sort data buy lineage. sorted_df = filtered_df.sort('lineage') # Write CSV out. package_filter.filtered_data_frame = sorted_df package_filter.write_csv(basename + '.csv') # Write unique list of taxa. unique = package_filter.filtered_data_frame.lineage.unique() unique.tofile('unique_' + basename + '.txt', sep='\n')
retmax = 6000 # Instantiate search object. sra_search = SRASearch(query=query, retmax=retmax, email=email_bruno) # Execute search itself. sra_search.esearch() # Fetch metadata from packages. packages = [SRAPackage(sra_id) for sra_id in sra_search.idlist] # Store packages in data frame for filtering. package_filter = FilterPackages(packages) # copy working data frame. df = package_filter.data_frame # Filter booleans. filtered_df = df[df.library_layout == 'PAIRED'][df.nreads > 1][df.read_average >= 70] # Sort data buy lineage. sorted_df = filtered_df.sort('lineage') # Write CSV out. package_filter.filtered_data_frame = sorted_df package_filter.write_csv(basename) # Write unique list of taxa. unique = package_filter.filtered_data_frame.lineage.unique() unique.tofile(basename + '_unique' + '.txt', sep='\n')