def open_csv(path, delimiter=',', header=True, col_names=None, parse_datetimes=[]): """Creates a structured array from a local .csv file Parameters ---------- path : str path of the csv file delimiter : str Character used to delimit csv fields header : bool If True, assumes the first line of the csv has column names col_names : list of str or None If header is False, this list will be used for column names parse_datetimes : list of col names Columns that should be interpreted as datetimes Returns ------- numpy.ndarray structured array corresponding to the csv If header is False and col_names is None, diogenes will assign arbitrary column names """ with open(path, 'rU') as fin: return open_csv_as_sa(fin, delimiter, header, col_names, parse_datetimes=parse_datetimes)
def open_csv_url(url, delimiter=',', header=True, col_names=None, parse_datetimes=[]): """Creates a structured array from a url Parameters ---------- url : str url of the csv file delimiter : str Character used to delimit csv fields header : bool If True, assumes the first line of the csv has column names col_names : list of str or None If header is False, this list will be used for column names parse_datetimes : list of col names Columns that should be interpreted as datetimes Returns ------- numpy.ndarray structured array corresponding to the csv If header is False and col_names is None, diogenes will assign arbitrary column names """ fin = urllib2.urlopen(url) sa = open_csv_as_sa(fin, delimiter, header, col_names, parse_datetimes=parse_datetimes) fin.close() return sa
def run_csv(fin, uid_feature, label_feature, clfs=DBG_std_clfs): """ Turn a CSV into an Experiment then turn the Experiment into models""" sa = open_csv_as_sa(fin) labels = sa[label_feature] M = remove_cols(sa, label_feature) exp = Experiment(M, labels, clfs=clfs) register_exp(exp, uid_feature)
def run_csv(fin, uid_feature, label_feature): sa = open_csv_as_sa(fin) labels = sa[label_feature] M = remove_cols(sa, label_feature) exp = Experiment(M, labels, clfs=DBG_std_clfs) exp.run() last_experiments[current_user.id] = exp clear_models(current_user.id) for trial in exp.trials: for subset in trial.runs: for run in subset: register_model(current_user.id, run.clf, dt.now(), run.M[run.train_indices], run.M[run.test_indices], run.labels[run.train_indices], run.labels[run.test_indices], run.col_names, uid_feature)
def run_csv(fin, uid_feature, label_feature): sa = open_csv_as_sa(fin) labels = sa[label_feature] M = remove_cols(sa, label_feature) exp = Experiment(M, labels, clfs=DBG_std_clfs) exp.run() last_experiments[current_user.id] = exp clear_models(current_user.id) for trial in exp.trials: for subset in trial.runs: for run in subset: register_model( current_user.id, run.clf, dt.now(), run.M[run.train_indices], run.M[run.test_indices], run.labels[run.train_indices], run.labels[run.test_indices], run.col_names, uid_feature)