def parse_args(ap): # parse args args = u.parse_args(ap) # check arguments if (len(set(os.path.basename(i) for i in args.inputs)) != len(args.inputs)): ap.error('input file basenames must be unique') # absolutize input files args.inputs = [os.path.abspath(i) for i in args.inputs] # set sortdir if unset if (args.sortdir is None): args.sortdir = 'tmp' # done return args
dfs[freq].index = dfs[freq].index.to_period(freq) dfs[freq].rename(columns=lambda c: re.sub(r'\$norm$', '', c), inplace=True) # 2. Clean up any NANs. We interpolate anything in the middle and change # boundary NANs to zero. Note that the boundaries are fairly well # outside the study period, so that should have minimal effect. dfs[freq].interpolate(method='linear', axis=0, inplace=True) dfs[freq].fillna(0, inplace=True) # 3. Trim the DataFrames to the study period. This doesn't have any effect, # since we trim to each test later, but it saves memory. (dfs[freq], _) = dfs[freq].align(eg, axis=0, join='inner') assert (dfs[freq].index.equals(eg.index)) # 4. Build a DataFrame for each disease. This duplicates some vectors, but # not enough to be a worry. vs = dict() for (ob, ts) in sorted(g.truth.items()): freq = ts.index.freq.name dist = args.distance vs[ob] = dfs[freq].select(lambda c: relevant_p(ob, c, dist), axis=1) l.info(' %-15s %3d articles' % (ob + ':', len(vs[ob].columns))) return vs ### Bootstrap ### if (__name__ == '__main__'): args = u.parse_args(ap) args.in_ = getattr(args, 'in') # foo.in is a syntax error u.configure(args.config) u.logging_init('expmt') main()
clf_times = [] for size in sizes: clf_times.append('%.5f' % time_it(clf, tweets[:size])) l.info(clf.__module__[-15:] + '\t' + '\t'.join(clf_times)) def read_tsvs(filenames): tweets = [] ct = 0 for filename in filenames: reader = tweet.Reader(filename) for tw in reader: tweets.append(tw.text) ct += 1 if ct > args.test_size: return tweets return tweets ### Bootstrap ### try: args = u.parse_args(ap) u.logging_init('clsbmk') if (__name__ == '__main__'): main() except testable.Unittests_Only_Exception: testable.register('')