pickles_dir)

    # Use GENCODE datasource to get list of all possible genes
    gencode_ds_loc = expanduser(args.gencode_ds)
    gencode_ds = DatasourceFactory.createDatasource(
        configFilename=gencode_ds_loc, leafDir=os.path.dirname(gencode_ds_loc))

    # Use simple_uniprot TSV to get the uniprot_entry_names
    # Create the transcript to uniprot info mappings.  But take less RAM.  Given a gene, get the uniprot record.
    uniprotDS = GenericTranscriptDatasource(src_file=uniprot_tsv,
                                            title="UniProt",
                                            version="2014_12",
                                            geneColumnName="gene")

    # key is the uniprot_entry_name from the uniprotDS
    muts = generateTranscriptMuts(gencode_ds, uniprotDS)

    swissKeys = swiss_data.keys()
    tremblKeys = trembl_data.keys()

    featureTypeToAnnotation = {
        "SITE": "site",
        "VARIANT": "natural_variation",
        "COMPBIAS": "region",
        "REGION": "region",
        "DOMAIN": "region",
        "CONFLICT": "experimental_info"
    }
    featureTypes = featureTypeToAnnotation.keys()
    ctr = 0
    numTranscriptsNotInUniprot = 0
    # TODO: Remove hardcoded paths
    # TODO: Reduce code duplication

    swiss_data = parseWithShove(uniprot_swiss_fname, parse_uniprot_data, "/bulk/pickles/")
    trembl_data = parseWithShove(uniprot_trembl_fname, parse_uniprot_data, "/bulk/pickles/")

    # Use GAF datasource to get list of all possible genes
    gafDS = Gaf(gaf_file, gaf_transcript_file)

    # Use simple_uniprot TSV to get the uniprot_entry_names
    # Create the gene to uniprot info mappings.  But take less RAM.  Given a gene, get the uniprot record.
    uniprotDS = Generic_Gene_DataSource(src_file=uniprot_tsv, title="UniProt", version="2011_09", geneColumnName="gene")

    # key is the uniprot_entry_name from the uniprotDS
    muts = generateTranscriptMuts(gafDS, uniprotDS)

    swissKeys = swiss_data.keys()
    tremblKeys = trembl_data.keys()

    featureTypeToAnnotation = {"SITE":"site", "VARIANT":"natural_variation", "COMPBIAS":"region" , "REGION":"region", "DOMAIN":"region", "CONFLICT":"experimental_info"}
    featureTypes = featureTypeToAnnotation.keys()
    ctr = 0
    numTranscriptsNotInUniprot = 0
    uniprotEntryNameKey = 'UniProt_uniprot_entry_name'
    for m in muts:
        ctr += 1
        if (ctr % 1000) == 0:
            print(str(ctr))

        if m[uniprotEntryNameKey] in swissKeys:
    # TODO: Reduce code duplication

    swiss_data = parse_with_shove(uniprot_swiss_fname, parse_uniprot_data, pickles_dir)
    trembl_data = parse_with_shove(uniprot_trembl_fname, parse_uniprot_data, pickles_dir)

    # Use GENCODE datasource to get list of all possible genes
    gencode_ds_loc = expanduser(args.gencode_ds)
    gencode_ds = DatasourceFactory.createDatasource(configFilename=gencode_ds_loc, leafDir=os.path.dirname(gencode_ds_loc))

    # Use simple_uniprot TSV to get the uniprot_entry_names
    # Create the transcript to uniprot info mappings.  But take less RAM.  Given a gene, get the uniprot record.
    uniprotDS = GenericTranscriptDatasource(src_file=uniprot_tsv, title="UniProt", version="2014_12", geneColumnName="gene")

    # key is the uniprot_entry_name from the uniprotDS
    muts = generateTranscriptMuts(gencode_ds, uniprotDS)

    swissKeys = swiss_data.keys()
    tremblKeys = trembl_data.keys()

    featureTypeToAnnotation = {"SITE":"site", "VARIANT":"natural_variation", "COMPBIAS":"region" , "REGION":"region", "DOMAIN":"region", "CONFLICT":"experimental_info"}
    featureTypes = featureTypeToAnnotation.keys()
    ctr = 0
    numTranscriptsNotInUniprot = 0
    uniprotEntryNameKey = 'UniProt_uniprot_entry_name'
    txs_already_processed = set()
    records_already_processed = set()
    num_txs_with_same_data = 0
    for m in muts:
        if m['transcript_id'] in txs_already_processed:
            continue