def get_geneids_from_affy(affy_id_list, affy_file=None):
    """
    Returns a dictionary mapping affy probe ids to
    the tuple (genebank,unigene,symbol) given an input list
    of affy probe ids, and a csv file from affymetrix with
    the appropriate information
    @param affy_id_list: A list of strings like '1000_at'...
    @param affy_file: If none, then the function get_affy_key_file()
    will be called to get the full file name and path to the csv file,
    else specify the filename/path.
    """
    if affy_file is None:
        affy_file = get_affy_key_file()

    affy_dict = {}

    lines = []
    with open(affy_file, "r") as f:
        for tmpline in f:
            if tmpline[0] != "#":
                lines.append(tmpline)  # omit header/comment lines

    for i, ln in enumerate(lines[1:]):  # lines[0] is the column headers
        ifr.print_progress(i, len(lines))
        tmp = ifr.smart_split(ln, sep=",")
        key = tmp[0]
        genebank = tmp[8]
        unigene = tmp[10]
        symbol = tmp[14]
        affy_dict[key] = (genebank, unigene, symbol)

    return affy_dict
def gen_affy_to_geneId_dict(affy_file_subdir="HG_U95A.na33.annot", affy_fn="HG_U95A.na33.annot.csv"):
    """
    Converts a list of affymetric probe set ids into a genelist
    with names suitable for querying gather or kegg. Generates
    a dictionary with entries { affy_id : gene_id_list }. Most times, gene_id_list will
    have only a single entry, but several probes have multiple Gene IDs given.
    @param affy_file_subdir: The subdirectory of the ifr.DATA_DIR that has
    the HG_U95A.na33.annot.csv file.
    @param affy_fn: The csv file in the subdirectory with the data. The parameter is
    provided in case the file was renamed from the orginal name of "HG_U95A.na33.annot.csv"
    @note: Relies on a data file called HG_U95A.na33.annot.csv that must be present
    in the HG_U95A.na33.annot subdirectory of the linked Data directory. It would
    be most efficient to use this function once and save the resulting dictionary
    in a pickle file for later use instead of having to re-parse the data.
    """
    affy_dict = {}

    affy_file = os.path.join(ifr.DATA_DIR, affy_file_subdir, affy_fn)
    lines = []
    with open(affy_file, "r") as f:
        for tmpline in f:
            if tmpline[0] != "#":
                lines.append(tmpline)  # omit header/comment lines

    for i, ln in enumerate(lines[1:]):  # lines[0] is the column headers
        ifr.print_progress(i, len(lines))
        tmp = ifr.smart_split(ln, sep=",")
        key = tmp[0]
        val = tmp[14]
        if val == "---":
            # this affy id has no gene symbol
            genelist = []
        elif "///" in val:  # there are more than one GeneIds for this probe
            # print "Subfield indicator in Gene Symbol for %s, line: %d."%(key,(i+1))
            # print "Val: %s"%tmp[14]
            genelist = [x.strip() for x in val.split("///") if x.strip() != ""]
        else:
            genelist = [val]

        affy_dict[key] = list(set(genelist))  # remove duplicates

    return affy_dict