コード例 #1
0
ファイル: Los.py プロジェクト: andrewshir/CollIntel
def get_data((sex, age, sline)):
    result = []
    for row in all_data:
        admit_date = row[2]
        agef = fp.split_age(int(row[9]))
        sexf = int(row[10])
        slinef = row[14]
        soif = row[8]
        rlos = row[5]

        if slinef is None:
            continue
        if len(admit_date) == 0:
            continue
        if len(rlos) == 0:
            continue
        if len(soif) == 0:
            continue

        if (sex, age, sline) != (sexf, agef, slinef):
            continue

        if int(soif) > 2:
            continue
        datetime = fp.parse_datetime(admit_date)

        result.append(int(rlos))
    return result
コード例 #2
0
 def print_freq(data):
     freq = {}
     length = float(len(data))
     for x in data:
         xcat = fp.split_age(x)
         freq.setdefault(xcat, 0)
         freq[xcat] += 1
     for x in sorted(freq.keys()):
         print "%d: %.2f" % (x, round(freq[x]/length, 2)),
     print
コード例 #3
0
def train_rlos(data, show_chart=False):
    """Train LOS estimator"""
    """Train patient LOS for triplet (sex, age, sline)"""
    freq = {}
    for row in data:
        sex = int(row["sex"])
        age = fp.split_age(int(row["age"]))
        sline = row["sline"]
        rlos = int(row["rlos"])

        if rlos == 0:
            print "RLOS equals zero for sex %d, age %d, SL %s" % (sex, age, sline)

        tuple = (sex, age, sline)
        freq.setdefault(tuple, [])
        freq[tuple].append(rlos)

    result = {}
    for tuple, train_data in freq.items():
        (sex, age, sline) = tuple
        if len(train_data) < training_threshold:
            print "Too small training set (<%d) for sex %d, age %d, SL %s. Data will be skipped. " % \
                  (training_threshold, sex, age, sline)
            continue

        X = np.array([train_data]).transpose()
        kde = KernelDensity(kernel='tophat', bandwidth=0.5).fit(X)
        kdef = lambda size: [round(l[0]) for l in kde.sample(size).tolist()]
        result[tuple] = kde

        if show_chart:
            # print "Sex=%d, Age=%d, SL=%s" % (sex, age, sline)
            # print_freq(ages)
            samples = kdef(len(train_data)) if len(train_data) < 500 else kdef(500)
            # print_freq(samples)

            # hist for train data
            plt.subplot(211)
            plt.title("RLOS train data for Sex=%d, Age=%d, SL=%s" % (sex, age, sline))
            plt.ylabel('freq')
            plt.xlabel('RLOS')
            plt.hist(train_data)

            # estimated density
            plt.subplot(212)
            plt.title("Estimated density Sex=%d, Age=%d, SL=%s" % (sex, age, sline))
            plt.ylabel('freq')
            plt.xlabel('RLOS')
            plt.hist(samples)

            plt.show()

    return result
コード例 #4
0
def train_admit_count(data, show_chart=False):
    """Train patient admittance number for triplet (sex, age, sline)"""
    freq = {}
    for row in data:
        sex = int(row["sex"])
        age = fp.split_age(int(row["age"]))
        sline = row["sline"]
        admit = row["admit"]

        tuple = (sex, age, sline)
        freq.setdefault(tuple, {})
        freq[tuple].setdefault(admit, 0)
        freq[tuple][admit] += 1

    result = {}
    for tuple, days in freq.items():
        (sex, age, sline) = tuple
        train_data = days.values()
        if len(train_data) < training_threshold:
            print "Too small training set (<%d) for sex %d, age %d, SL %s. Data will be skipped. " % \
                  (training_threshold, sex, age, sline)
            continue

        X = np.array([train_data]).transpose()
        kde = KernelDensity(kernel='tophat', bandwidth=0.5).fit(X)
        kdef = lambda size: [int(round(l[0])) for l in kde.sample(size).tolist()]
        result[tuple] = kde

        if show_chart:
            # print "Sex=%d, Age=%d, SL=%s" % (sex, age, sline)
            # print_freq(ages)
            samples = kdef(len(train_data)) if len(train_data) < 500 else kdef(500)
            # print_freq(samples)

            # hist for train data
            plt.subplot(211)
            plt.title("Admit count train data for Sex=%d, Age=%d, SL=%s" % (sex, age, sline))
            plt.ylabel('freq')
            plt.xlabel('admittance count')
            plt.hist(train_data)

            # estimated density
            plt.subplot(212)
            plt.title("Estimated density Sex=%d, Age=%d, SL=%s" % (sex, age, sline))
            plt.ylabel('freq')
            plt.xlabel('admittance count')
            plt.hist(samples)

            plt.show()

    return result
コード例 #5
0
def build_chart(generated_data):
    """Builds charts of freq differences between average model and historical data"""
    freqs_model = {}
    freqs_history = {}
    for row in generated_data:
        id = row[0]
        sex = row[2]
        age = row[3]
        sline = row[4]
        rlos = row[5]
        tuple = (sex, fp.split_age(age), sline)

        if id[0] == 'M':
            freqs_model.setdefault(tuple, {})
            freqs_model[tuple].setdefault(rlos, {})
            freqs_model[tuple][rlos].setdefault(id, 0)
            freqs_model[tuple][rlos][id] += 1
        else:
            freqs_history.setdefault(tuple, {})
            freqs_history[tuple].setdefault(rlos, {})
            freqs_history[tuple][rlos].setdefault(id, 0)
            freqs_history[tuple][rlos][id] += 1

    # calculate average freqs
    freqs_avg_model = {}
    freqs_avg_history = {}
    for tuple in freqs_model.keys():
        rt = {}
        for rlos in freqs_model[tuple].keys():
            d = freqs_model[tuple][rlos]
            rt[rlos] = sum(d.values()) / float(len(d))
        freqs_avg_model[tuple] = rt
    for tuple in freqs_history.keys():
        rt = {}
        for rlos in freqs_history[tuple].keys():
            d = freqs_history[tuple][rlos]
            rt[rlos] = sum(d.values()) / float(len(d))
        freqs_avg_history[tuple] = rt

    plot_data = {}
    for tuple in freqs_avg_model.keys():
        fm = freqs_avg_model[tuple]
        if tuple not in freqs_avg_history:
            print "Cannot find history data to compare with model for sex: %d, age %d, sline %s" % tuple
        fh = freqs_avg_history[tuple]
        plot_data[tuple] = calculate_distance(fm, fh)

    plt.title("Difference between average modeled and historic data")
    plt.plot(sorted(plot_data.values()), 'ro')
    plt.show()
コード例 #6
0
def predict_patient_flow(ages_estimator, admit_count_estimator, rlos_estimator, day_patients_prob,
                         model_count=1, history_count=1, sline_list=None, days=30):
    if sline_list is None:
        sline_list = []
        for common_sline in ages_estimator.keys():
            found = False
            for sex, age, sline in admit_count_estimator.keys():
                if sline == common_sline:
                    found = True
                    break
            if not found:
                continue
            found = False
            for sex, age, sline in rlos_estimator.keys():
                if sline == common_sline:
                    found = True
                    break
            if not found:
                continue
            found = False
            for sex, age, sline in day_patients_prob.keys():
                if sline == common_sline:
                    found = True
                    break
            if not found:
                continue
            sline_list.append(sline)

    # dataset indexes to make dataset identifiers
    model_index = 1
    history_index = 1

    result = []
    for sline in sline_list:
        for sex in [2, 3]:
            for age in [2, 3, 4, 5]:
                tuple = (sex, age, sline)

                if tuple not in admit_count_estimator \
                        or tuple not in rlos_estimator \
                        or tuple not in day_patients_prob:
                    print "Cannot find all estimations for sex %d, age %d, SL %s" % tuple
                    continue

                # add historic data
                for it in xrange(history_count):
                    result.extend(historic_data(tuple, days))
                    history_index += 1

                # model patient flow
                for it in xrange(model_count):
                    rlos_flow_func = lambda: [int(round(l[0]))
                                              for l in rlos_estimator[tuple].sample(100).tolist()]
                    rlos_flow = rlos_flow_func()
                    age_flow_func = lambda: [a for a in ages_estimator[sline](500) if fp.split_age(a) == age]
                    age_flow = recall_if_empty(age_flow_func)
                    admit_count_func = lambda: [int(round(l[0]))
                                                for l in admit_count_estimator[tuple].sample(100).tolist()]
                    admit_flow = admit_count_func()

                    for iday in xrange(days):
                        if day_patients_prob[tuple] == 1.0 or random.random() <= day_patients_prob[tuple]:
                            pat_count = admit_flow.pop()
                            if len(admit_flow) == 0:
                                admit_flow = admit_count_func()
                            for p in xrange(pat_count):
                                id = "M%02d (%d, %d, %s)" % (model_index, sex, age, sline)
                                result.append(
                                    (id, str(iday+1), sex, age_flow.pop(), sline, rlos_flow.pop()))
                                if len(rlos_flow) == 0:
                                    rlos_flow = rlos_flow_func()
                                if len(age_flow) == 0:
                                    age_flow = age_flow_func()

                    model_index += 1
    return result
コード例 #7
0
alert_count = 50
raw_data, missed_drg = fp.load_data_with_sline()
data = fp.change_to_dict(fp.filter_incomplete_data(raw_data))
print "Rows with following DRG were skipped:",
print missed_drg
print "Filter out %d of %d" % (len(raw_data) - len(data), len(raw_data))

def history(all_data, (sex, age, sline), days=30):
    """Return historical data for selected combination of (sex, age, sline)"""
    start_date = None
    end_date = None
    hist_data = {}
    for row in all_data:
        admit_date = row[2]
        agef_in_years = int(row[9])
        agef = fp.split_age(agef_in_years)
        sexf = int(row[10])
        slinef = row[14]
        rlos = row[5]

        if slinef is None:
            continue
        if len(admit_date) == 0:
            continue

        if len(rlos) == 0:
            continue

        if (sex, age, sline) != (sexf, agef, slinef):
            continue
        datetime = fp.parse_datetime(admit_date)