def merge_rearrangement(out_filename, in_filenames, drop=False, debug=False): """ Merge one or more AIRR rearrangements files Arguments: out_filename (str): output file path. in_filenames (list): list of input files to merge. drop (bool): drop flag. If True then drop fields that do not exist in all input files, otherwise combine fields from all input files. debug (bool): debug flag. If True print debugging information to standard error. Returns: bool: True if files were successfully merged, otherwise False. """ try: # gather fields from input files readers = [ RearrangementReader(open(f, 'r'), debug=debug) for f in in_filenames ] field_list = [x.fields for x in readers] if drop: field_set = set.intersection(*map(set, field_list)) else: field_set = set.union(*map(set, field_list)) field_order = OrderedDict([(f, None) for f in chain(*field_list)]) out_fields = [f for f in field_order if f in field_set] # write input files to output file sequentially with open(out_filename, 'w+') as handle: writer = RearrangementWriter(handle, fields=out_fields, debug=debug) for reader in readers: for r in reader: writer.write(r) reader.close() except: sys.stderr.write( 'Error occurred while merging AIRR rearrangement files.\n') return False return True
def dump_rearrangement(dataframe, filename, debug=False): """ Write the contents of a data frame to an AIRR rearrangements file Arguments: dataframe (pandas.DataFrame): data frame of rearrangement data. filename (str): output file path. debug (bool): debug flag. If True print debugging information to standard error. Returns: bool: True if the file is written without error. """ # TODO: test pandas.DataFrame.to_csv with converters argument as an alterative # dataframe.to_csv(handle, sep='\t', header=True, index=False, encoding='utf-8') fields = dataframe.columns.tolist() with open(filename, 'w+') as handle: writer = RearrangementWriter(handle, fields=fields, debug=debug) for __, row in dataframe.iterrows(): writer.write(row.to_dict()) return True