Exemple #1
0
def merge_rearrangement(out_filename, in_filenames, drop=False, debug=False):
    """
    Merge one or more AIRR rearrangements files

    Arguments:
      out_filename (str): output file path.
      in_filenames (list): list of input files to merge.
      drop (bool): drop flag. If True then drop fields that do not exist in all input
                   files, otherwise combine fields from all input files.
      debug (bool): debug flag. If True print debugging information to standard error.

    Returns:
      bool: True if files were successfully merged, otherwise False.
    """
    try:
        # gather fields from input files
        readers = [
            RearrangementReader(open(f, 'r'), debug=debug)
            for f in in_filenames
        ]
        field_list = [x.fields for x in readers]
        if drop:
            field_set = set.intersection(*map(set, field_list))
        else:
            field_set = set.union(*map(set, field_list))
        field_order = OrderedDict([(f, None) for f in chain(*field_list)])
        out_fields = [f for f in field_order if f in field_set]

        # write input files to output file sequentially
        with open(out_filename, 'w+') as handle:
            writer = RearrangementWriter(handle,
                                         fields=out_fields,
                                         debug=debug)
            for reader in readers:
                for r in reader:
                    writer.write(r)
                reader.close()
    except:
        sys.stderr.write(
            'Error occurred while merging AIRR rearrangement files.\n')
        return False

    return True
Exemple #2
0
def dump_rearrangement(dataframe, filename, debug=False):
    """
    Write the contents of a data frame to an AIRR rearrangements file

    Arguments:
      dataframe (pandas.DataFrame): data frame of rearrangement data.
      filename (str): output file path.
      debug (bool): debug flag. If True print debugging information to standard error.

    Returns:
      bool: True if the file is written without error.
    """
    # TODO: test pandas.DataFrame.to_csv with converters argument as an alterative
    # dataframe.to_csv(handle, sep='\t', header=True, index=False, encoding='utf-8')

    fields = dataframe.columns.tolist()
    with open(filename, 'w+') as handle:
        writer = RearrangementWriter(handle, fields=fields, debug=debug)
        for __, row in dataframe.iterrows():
            writer.write(row.to_dict())

    return True