コード例 #1
0
def analyse(path, filter_fn, field_name, print_csv=False):
    data = load_data(path, filter_fn)
    occurrences = data['days']
    day_of_cycle = data['day_of_cycle']
    weekdays = data['weekdays']
    day_of_cycle_total = sum([day_of_cycle[x] for x in day_of_cycle])

    if len(occurrences) == 0:
        print "No tags found. Are you sure '%s' is the correct tag?" % tag
        return

    deltas = []
    for d in xrange(len(occurrences)-1):
        delta = occurrences[d+1] - occurrences[d]
        if delta.days > 2:
            deltas.append(delta.days)

    if print_csv:
        print "date,%s" % field_name
        for d in date_range(occurrences[0], occurrences[len(occurrences)-1]):
            if d in occurrences:
                print str(d) + ",1"
            else:
                print str(d) + ",0"
        return

    print "==============="
    print "Day of cycle distribution"
    previous = None
    for k in sorted(day_of_cycle.keys()):
        if previous:
            if k - previous > 1:
                print ".\n."
        previous = k
        print ("Day %s:" % k).ljust(10), str(day_of_cycle[k]).ljust(4), round(day_of_cycle[k] / float(day_of_cycle_total), 2)
    print "==============="
    print "Weekday distribution"
    for k in sorted(weekdays.keys()):
        print weekday_from_int(k).ljust(5), weekdays[k]
    print "==============="
    print "Total amount of days with %s: " % field_name, len(occurrences)
    print "Average amount of days between %s: " % field_name, average(deltas)
    print "Std dev: ", std_dev(deltas)
    print "Last day with %s: " % field_name, occurrences[len(occurrences)-1]
    print "Days between today and last day with %s: " % field_name, (datetime.datetime.today().date() - occurrences[len(occurrences)-1].date()).days
    print "==============="
コード例 #2
0
ファイル: randomForest.py プロジェクト: albert-001/CS
if __name__ == "__main__":
    filename = sys.argv[1]
    num_trees = int(sys.argv[2])

    data, attributes, target_attr = get_data(filename)
    n = len(data)

    accs = []
    for i in range(5):
        valid_data = data[int(float(n) / 5 * i):int(float(n) / 5 *
                                                    (i + 1))]  #validation data
        train_data = [d for d in data if not d in valid_data]  #training data
        labels = [d[target_attr] for d in valid_data]
        trees = create_forest(data, attributes, target_attr, num_trees)
        #classify
        classes = []
        for tree in trees:
            classification = classify_decision_tree(tree, valid_data,
                                                    vote(labels))
            classes.append(classification)
        classification = [vote(c) for c in zip(*classes)]
        count = 0
        for x, y in zip(classification, labels):
            if x == y:
                count += 1
        acc = float(count) / len(classification)
        accs.append(acc)
        print("accuracy: " + str(100 * acc) + "%")
    print("standard deviation: " + str(std_dev(accs)))