Ejemplo n.º 1
0
def ReadFiles(options):
    # set variables
    ensemble_paths = options.ensemble_paths

    # read ensembles into ensemble lookup {'Ensemble_N' : [query_name_list]}
    ensemble_lookup = {}

    for ensemble_path in ensemble_paths:
        try:
            ensemble_file = open(ensemble_path, 'r')
        except IOError:
            print("\nUnable to open ensemble_list: {l}\n".format(
                l=ensemble_path))
            return 1

        ensemble_queries = [
            query.strip() for query in ensemble_file.read().split(',')
        ]
        ensemble_file.close()
        if len(ensemble_queries) == 0 or '' in ensemble_queries:
            print("\n{l} is empty\n".format(l=ensemble_path))
            return 1

        key = os.path.basename(ensemble_path)
        ensemble_lookup[key] = ensemble_queries

    # Run consistency checks on the input csv
    uniq = []
    for ensemble in ensemble_lookup.keys():
        for unique_query in [
                query for query in ensemble_lookup[ensemble]
                if query not in uniq
        ]:
            uniq.append(unique_query)

    # read input csv
    inputpath = os.path.abspath(options.inputpath)
    print('')
    print(" Reading input file ...")
    print('')
    molecules = csv_interface.read_csv(inputpath, options, uniq)
    if not molecules:
        print("\n '%s' was unable to be parsed\n")
        sys.exit(1)

    return molecules, ensemble_lookup
Ejemplo n.º 2
0
def run(itf):
    """
	Run optimize functions.
	"""

    if not itf:
        return 1

    # access user input
    options = SplitInput(itf)

    # read input
    inputpath = os.path.abspath(options.inputpath)
    print(" Reading input file ...")
    molecules = csv_interface.read_csv(inputpath, options)
    if not molecules:
        print("\n '{flag}' was unable to be parsed\n".format(
            flag=os.path.basename(options.inputpath)))
        sys.exit(1)

    # determine the sort order & ensemble_size
    #sort_order = classification.get_sort_order(molecules)
    sort_order = 'asc'
    ensemble_size = options.ensemble_size

    # loop over all ensembles
    # temp 2/3/15 append to auc_list ef_list & write it out for later histogram construction
    auc_list = []
    ef_list = []
    for size in [x + 1 for x in range(ensemble_size)]:
        auc, ef = optimizor(molecules, sort_order, size, options)
        auc_list += auc
        ef_list += ef
    # temp 2/9/15 write auc_list & ef_list out to files for subsequent post-processing
    f = open('auc_histogram.csv', 'w')
    for value in auc_list:
        f.write('%f\n' % value)
        #f.write('%f, %s\n' % (value[0], value[1]))
    f.close()
    f = open('ef_histogram.csv', 'w')
    for value in ef_list:
        f.write('%f\n' % value)
    f.close()
Ejemplo n.º 3
0
def run(itf):
    """
	Run optimize functions.
	"""

    if not itf:
        return 1

    # access user input
    options = SplitInput(itf)

    # read input
    inputpath = os.path.abspath(options.inputpath)
    print(" Reading input file ...")
    molecules = csv_interface.read_csv(inputpath, options)
    if not molecules:
        print("\n '{flag}' was unable to be parsed\n".format(flag=os.path.basename(options.inputpath)))
        sys.exit(1)

    # determine the sort order & ensemble_size
    #sort_order = classification.get_sort_order(molecules)
    sort_order = 'asc'
    ensemble_size = options.ensemble_size

    # loop over all ensembles
    # temp 2/3/15 append to auc_list ef_list & write it out for later histogram construction
    auc_list = []
    ef_list = []
    for size in [x + 1 for x in range(ensemble_size)]:
        auc, ef = optimizor(molecules, sort_order, size, options)
        auc_list += auc
        ef_list += ef
    # temp 2/9/15 write auc_list & ef_list out to files for subsequent post-processing
    f = open('auc_histogram.csv', 'w')
    for value in auc_list:
        f.write('%f\n' % value)
        #f.write('%f, %s\n' % (value[0], value[1]))
    f.close()
    f = open('ef_histogram.csv', 'w')
    for value in ef_list:
        f.write('%f\n' % value)
    f.close()
Ejemplo n.º 4
0
def run(itf):
    """
    run approximate functions
    :param itf:
    :return:
    """

    if not itf:
        return 1

    # access user input
    options = SplitInput(itf)

    # read input
    print(" Reading input file ...")
    molecules = csv_interface.read_csv(os.path.abspath(options.inputpath), options)
    if not molecules:
        print("\n '{f} was unable to be parsed\n".format(f=os.path.basename(options.inputpath)))
        sys.exit(1)
    approximator(molecules, options)
Ejemplo n.º 5
0
def ReadFiles(options):
    # set variables
    ensemble_paths = options.ensemble_paths

    # read ensembles into ensemble lookup {'Ensemble_N' : [query_name_list]}
    ensemble_lookup = {}

    for ensemble_path in ensemble_paths:
        try:
            ensemble_file = open(ensemble_path, 'r')
        except IOError:
            print("\nUnable to open ensemble_list: {l}\n".format(l=ensemble_path))
            return 1

        ensemble_queries = [query.strip() for query in ensemble_file.read().split(',')]
        ensemble_file.close()
        if len(ensemble_queries) == 0 or '' in ensemble_queries:
            print("\n{l} is empty\n".format(l=ensemble_path))
            return 1

        key = os.path.basename(ensemble_path)
        ensemble_lookup[key] = ensemble_queries

    # Run consistency checks on the input csv
    uniq = []
    for ensemble in ensemble_lookup.keys():
        for unique_query in [query for query in ensemble_lookup[ensemble] if query not in uniq]:
            uniq.append(unique_query)

    # read input csv
    inputpath = os.path.abspath(options.inputpath)
    print('')
    print(" Reading input file ...")
    print('')
    molecules = csv_interface.read_csv(inputpath, options, uniq)
    if not molecules:
        print("\n '%s' was unable to be parsed\n")
        sys.exit(1)

    return molecules, ensemble_lookup