コード例 #1
0
qs_kwargs_list = [
        # quick test (small portion of database)
        {'id__gt':0, 'id__lt': 1000},
    ]
# each row is a different tree in the forest
include_fields_list = [
    ['wikiitem__modified', 'wikiitem__title'],
    ['wikiitem__modified', 'wikiitem__title'],
    ]

for i, field in enumerate(fields):
    for j, qs_kwargs in enumerate(qs_kwargs_list):
        for k, include_fields in enumerate(include_fields_list):
            print
            print '=' * 80
            print "Attempt to predict: %s" % field
            print "Limit database to: %s" % qs_kwargs
            print "Indicator variables: %s" % include_fields
            qs = WikiItem.objects.filter(**qs_kwargs)
            print "Fitting to %s records." % qs.count()
            print '-' * 80
            tree = build_tree(qs, field=field, include_fields=include_fields + [field])
            tree_list += [tree]
            print_tree(tree)
            print '-' * 80
            draw_tree(tree, 'tree_%s_%s_%s.jpg' % (i, j, k))
            with open('tree_%s_%s_%s.pickle' % (i, j, k), 'wb') as fpout:
                pickle.dump(tree, fpout)
            with open('tree_%s_%s_%s.txt' % (i, j, k), 'wb') as fpout:
                fpout.write(represent_tree(tree))
コード例 #2
0
UN = CaseMaster.objects.values('case_number').distinct().count()
N_ce = CaseExchange.objects.count()
UN_ce = CaseExchange.objects.values('case_number').distinct().count()
N_hdtv = CaseHDTVHeader.objects.count()
UN_hdtv = CaseHDTVHeader.objects.values('case_number').distinct().count()


un = count_unique(CaseExchange.objects.values('case_number'), 'case_number')
assert(len(un.values()) == UN_ce)
assert(sum(un.values()) == N_ce)


qs = CaseHDTVHeader.objects.filter(case_number__lt=4000000)
ex = qs.all()[0]
ex.service_calls
print_tree(build_tree(qs, field='service_calls', ignore_fields=('id', 'case_number')))
# dispatch_status:Completed ? 
#  T-> date_time:2008-09-15 12:25:34.270000? 
#    T-> {1: 1}
#    F-> date_time:2008-07-09 08:49:36.437000? 
#      T-> {0: 1}
#      F-> {None: 0}
#  F-> {None: 0}
qs = CaseHDTVHeader.objects.filter(case_number__lt=2000000)
ex = qs.all()[0]
ex.service_calls
print_tree(build_tree(qs, field='dispatch_status', ignore_fields=('id', 'case_number', 'service_calls')))
# dispatch_status:Completed ? 
#  T-> date_time:2008-09-15 12:25:34.270000? 
#    T-> {1: 1}
#    F-> date_time:2008-07-09 08:49:36.437000? 
コード例 #3
0
N = CaseMaster.objects.count()
UN = CaseMaster.objects.values('case_number').distinct().count()
N_ce = CaseExchange.objects.count()
UN_ce = CaseExchange.objects.values('case_number').distinct().count()
N_hdtv = CaseHDTVHeader.objects.count()
UN_hdtv = CaseHDTVHeader.objects.values('case_number').distinct().count()

un = count_unique(CaseExchange.objects.values('case_number'), 'case_number')
assert (len(un.values()) == UN_ce)
assert (sum(un.values()) == N_ce)

qs = CaseHDTVHeader.objects.filter(case_number__lt=4000000)
ex = qs.all()[0]
ex.service_calls
print_tree(
    build_tree(qs, field='service_calls', ignore_fields=('id', 'case_number')))
# dispatch_status:Completed ?
#  T-> date_time:2008-09-15 12:25:34.270000?
#    T-> {1: 1}
#    F-> date_time:2008-07-09 08:49:36.437000?
#      T-> {0: 1}
#      F-> {None: 0}
#  F-> {None: 0}
qs = CaseHDTVHeader.objects.filter(case_number__lt=2000000)
ex = qs.all()[0]
ex.service_calls
print_tree(
    build_tree(qs,
               field='dispatch_status',
               ignore_fields=('id', 'case_number', 'service_calls')))
# dispatch_status:Completed ?