def __init__(self, fname_star_data, fname_lines=None): """Takes as input a star data file (CSV, required) and a line-list data file (CSV, optional) to create a q2 data object. """ try: self.star_data = read_csv(fname_star_data, file_type='stars') self.star_data_fname = fname_star_data if not self.star_data: logger.error('Star data file not read. Data.star_data '+\ 'attribute set to None.') except: self.star_data = None self.star_data_fname = None logger.error('Star data file not found.') if fname_lines: try: self.lines = read_csv(fname_lines, file_type='lines') self.lines_fname = fname_lines if not self.lines: logger.error('Lines data file not read. Data.lines '+\ 'attribute set to None.') except: self.lines = None self.lines_fname = None logger.error('Lines file not found.') else: self.lines = None self.lines_fname = None logger.warning('No lines data. Wont be able to MOOG.') if self.star_data: logger.info('Data object created with star_data attribute.') if self.lines: logger.info('lines_data attribute added to Data object.')
def Go(): tb_callback, log_dir = tools.GetTensorboardCallback('Gesture-') print(len(X)) model.fit(X, y, epochs=10000, batch_size=len(X), verbose=1, callbacks=[tb_callback]) testdata = tools.read_csv('data/GestureDeltas_test.csv').values testlabels = tools.read_csv('data/GestureLabels_test.csv').values #testdata = np.concatenate([testdata,np.square(testdata)], axis=1) results = model.evaluate(testdata, testlabels) tools.WriteJson(model, "Gesture-Weights")
def process_card(): card = tools.read_csv('./dirty_data/card.csv') card['type'].replace({"golden": "gold"}, inplace=True) # type : golden -> gold card['issued'] = pd.to_datetime(card['issued']) return card
def get_zone_count_estimates(location_id, door_count_placement_view_pair, start_date, end_date, adjusted=False): """Iterates through .csv files to return a list of (datetime, zone_count) ARGS location_id: location_id of installation, eg '55' door_count_placement_view_pair: placement and view id pair, e.g. ('3333230','0') start_date: in format YYYY-MM-DD, <datetime> end_date: in format YYYY-MM-DD. range is exclusive '<'. <datetime> adjusted: to select between raw data or adusted <bool>. if adjusted is chosen but not available, returns raw. RETURN array with (datetime, zone_count) tuples """ datetime_zone_count_pairs = [] day = timedelta(days = 1) curr_day = start_date while curr_day < end_date: date_str = date2str(curr_day, "%Y-%m-%d") fullpath = ANALYSIS_FOLDER_GLOBAL+str(location_id)+'/'+gtfilename(location_id,door_count_placement_view_pair,curr_day) if DEBUG: print 'get_zone_count_estimates: reading file:', fullpath data = read_csv(fullpath) for idx in range(len(data)): ts = utc.localize(get_datetime_from_csv_row(data[idx]), is_dst=None).astimezone(utc) if ts >= start_date and ts < end_date: datetime_zone_count_pairs.append(get_zone_count(data[idx], adjusted)) curr_day += day datetime_zone_count_pairs = np_array(datetime_zone_count_pairs) return datetime_zone_count_pairs
def parse_args(usage): my_parser = argparse.ArgumentParser(description=usage) my_parser.add_argument('Dataset', metavar='dataset_test.csv', type=str, help='the path to dataset_test.csv') my_parser.add_argument('Weights', metavar='weights.csv', type=str, help='the path to weights.csv') args = my_parser.parse_args() data_path = args.Dataset weights_path = args.Weights check_names(data_path, weights_path) data = tools.read_csv(data_path) weights = tools.read_csv(weights_path) return data, weights
def process_loan(): loan = tools.read_csv('./dirty_data/loan.csv') loan['duration'] = loan['duration'].apply(tools.loan_process_duration) # 处理duration为整12倍数 loan = loan[loan['duration'].notnull()] loan = loan[loan.apply(lambda row: tools.loan_process_payment(row['payments'], row['duration'], row['amount']), axis=1)] # 处理 payment * duration = amount return loan
def process_client(): client = tools.read_csv('./dirty_data/client.csv') client = client[client['birth_number'].apply(tools.is_birth_legal)] client[['birth_day', 'gender']] = client['birth_number'].apply(tools.card_process_birth) # 根据出生日期分男女 client.drop(columns=['birth_number'], inplace=True) return client
def process_trans(): trans = tools.read_csv('./dirty_data/trans.csv') trans = trans[trans['balance'].apply(tools.is_int)] # 处理balance非int trans['balance'] = pd.to_numeric(trans['balance']) trans = trans[trans['bank'].notnull()] # 处理bank有空值 trans['date'] = pd.to_datetime(trans['date']) # 格式化date return trans
def process_account(): account = tools.read_csv('./dirty_data/account.csv') account.drop_duplicates(inplace=True) # 去重 account['frequency'].replace({"POPLATEKMESICNE": "POPLATEK MESICNE"}, inplace=True) # frequency : POPLATEKMESICNE -> POPLATEK MESICNE # 转换数据类型 account['account_id'] = account['account_id'].apply(tools.account_process_not_int) account['date'] = pd.to_datetime(account['date']) return account
def parse_args(usage): my_parser = argparse.ArgumentParser(description=usage) my_parser.add_argument('Truth', metavar='true answers', type=str, help='the path to the true answers') my_parser.add_argument('Predicted', metavar='predicted answers', type=str, help='the path to the predicted answers') args = my_parser.parse_args() true_path = args.Truth predicted_path = args.Predicted true = tools.read_csv(true_path) predicted = tools.read_csv(predicted_path) try: true = true['Hogwarts House'] predicted = predicted['Hogwarts House'] except Exception: tools.error_exit('Failed to find house in data. Is data valid?') return true, predicted
def main(args): if len(args) == 4: import shelve from tools import read_csv loi_path = args[0] feature_filepath = args[1] features = args[2].split() file = shelve.open(feature_filepath, flag='r') if not file: return 'File not found' import matplotlib import matplotlib.pyplot as plt plt.ion() matplotlib.rcParams.update({'font.size': 4}) fig, ax = plt.subplots(1, figsize=(4,3), dpi=200) for key, value in file.items(): for feature in features: data = value.time_series[feature].getData() x = range(len(data)) ax.step(x, data, label=feature) ax.set_title(value.descr) file.close() leg = ax.legend(loc='upper right', prop={'size':4}) leg.get_frame().set_alpha(0.5) ax.set_xlabel('time') ax.set_ylabel('feature value') ax.grid() from ground_truth import get_zone_count file = read_csv(loi_path) occupancy = [get_zone_count(data, True) for data in file] fig, ax = plt.subplots(1, figsize=(4,3), dpi=200) ax.step(range(len(occupancy)), occupancy,label='LOI Occupancy') leg = ax.legend(loc='upper right', prop={'size':4}) leg.get_frame().set_alpha(0.5) ax.set_xlabel('time') ax.set_ylabel('occupancy') ax.grid() raw_input("Press enter to quit") else: print 'syntax is ./plot_feature_and_loi.py loi.csv audio_features.anal feature'
def main(args): if len(args) == 4: import shelve from tools import read_csv loi_path = args[0] feature_filepath = args[1] features = args[2].split() file = shelve.open(feature_filepath, flag='r') if not file: return 'File not found' import matplotlib import matplotlib.pyplot as plt plt.ion() matplotlib.rcParams.update({'font.size': 4}) fig, ax = plt.subplots(1, figsize=(4, 3), dpi=200) for key, value in file.items(): for feature in features: data = value.time_series[feature].getData() x = range(len(data)) ax.step(x, data, label=feature) ax.set_title(value.descr) file.close() leg = ax.legend(loc='upper right', prop={'size': 4}) leg.get_frame().set_alpha(0.5) ax.set_xlabel('time') ax.set_ylabel('feature value') ax.grid() from ground_truth import get_zone_count file = read_csv(loi_path) occupancy = [get_zone_count(data, True) for data in file] fig, ax = plt.subplots(1, figsize=(4, 3), dpi=200) ax.step(range(len(occupancy)), occupancy, label='LOI Occupancy') leg = ax.legend(loc='upper right', prop={'size': 4}) leg.get_frame().set_alpha(0.5) ax.set_xlabel('time') ax.set_ylabel('occupancy') ax.grid() raw_input("Press enter to quit") else: print 'syntax is ./plot_feature_and_loi.py loi.csv audio_features.anal feature'
def parse_args(usage): my_parser = argparse.ArgumentParser(description=usage) my_parser.add_argument('Dataset', metavar='dataset_test.csv', type=str, help='the path to dataset_test.csv') my_parser.add_argument('-a', '--all', action='store_true', help='Plot all courses.\ Include courses with homogeneous score distribution between houses, & courses that are similar.\ Default: only plot courses useful for logistic regression') args = my_parser.parse_args() path = args.Dataset plot_all = args.all data = tools.read_csv(path) return data, plot_all
def parse_args(usage): my_parser = argparse.ArgumentParser(description=usage) my_parser.add_argument('Dataset', metavar='dataset', type=str, help='the path to dataset') my_parser.add_argument('-t', '--timer', action='store_true', help='Display time taken. Default false') my_parser.add_argument('-c', '--cost', action='store_true', help='Display cost graph, prediction error over training period. Default false') args = my_parser.parse_args() path = args.Dataset data = tools.read_csv(path) timer = args.timer cost = args.cost return data, timer, cost
def get_zone_count_estimates(location_id, door_count_placement_view_pair, start_date, end_date, adjusted=False): """Iterates through .csv files to return a list of (datetime, zone_count) ARGS location_id: location_id of installation, eg '55' door_count_placement_view_pair: placement and view id pair, e.g. ('3333230','0') start_date: in format YYYY-MM-DD, <datetime> end_date: in format YYYY-MM-DD. range is exclusive '<'. <datetime> adjusted: to select between raw data or adusted <bool>. if adjusted is chosen but not available, returns raw. RETURN array with (datetime, zone_count) tuples """ datetime_zone_count_pairs = [] day = timedelta(days=1) curr_day = start_date while curr_day < end_date: date_str = date2str(curr_day, "%Y-%m-%d") fullpath = ANALYSIS_FOLDER_GLOBAL + str( location_id) + '/' + gtfilename( location_id, door_count_placement_view_pair, curr_day) if DEBUG: print 'get_zone_count_estimates: reading file:', fullpath data = read_csv(fullpath) for idx in range(len(data)): ts = utc.localize(get_datetime_from_csv_row(data[idx]), is_dst=None).astimezone(utc) if ts >= start_date and ts < end_date: datetime_zone_count_pairs.append( get_zone_count(data[idx], adjusted)) curr_day += day datetime_zone_count_pairs = np_array(datetime_zone_count_pairs) return datetime_zone_count_pairs
# подключение нескольких файлов в проект # csv from tools import read_csv filename = "test2.csv" data = read_csv(filename) assert len(data[0]) > 1, "One column!!!" # from tools import read_csv, write_csv # # filename = "test.csv" # data = read_csv(filename) # print(data) # write_csv("test2.csv", data) # with open(filename, 'r', encoding="utf-8") as csv_file: # data = [] # reader = csv.DictReader(csv_file) # for row in reader: # data.append(dict(row)) # # print(data) # # data.append({'Name': 'Tod', 'Age': '30', 'Value': '12.4', 'Text': 'qwerty'}) # # with open(filename, 'w', encoding="utf-8") as csv_file: # fieldnames = data[0].keys() # writer = csv.DictWriter(csv_file, fieldnames=fieldnames) # writer.writeheader() # writer.writerows(data)
import tools data = tools.read_csv(tools.path_vendas) id = tools.write_csv(tools.path_vendas, data) print( "sua ordem de serviço é: %05d\nGuarde o número, pois somente com ele, você conseguirá acompanhar o seu chamado" % id)
def process_order(): order = tools.read_csv('./dirty_data/order.csv') order.drop_duplicates(inplace=True) return order
def process_district(): district = tools.read_csv('./dirty_data/district.csv') district.drop_duplicates(inplace=True) return district
def GetData(): data = tools.read_csv('SpiralData.csv').values[:,:-1] labels = tools.read_csv('SpiralLables.csv').values data = np.concatenate([data,AddNoise(data)], axis=0) labels = np.concatenate([labels,labels], axis=0) return data, labels
def GetData(): data = tools.read_csv('data/GroupedDeltas.csv') labels = tools.read_csv('data/GroupedLabels.csv') return data, labels
def BuildData(): data = tools.read_csv('data/GestureDeltasTrain.csv').values labels = tools.read_csv('data/GestureLabelsTrain.csv').values data = np.concatenate( [data, tools.read_csv('data/CorrectionsDeltas00.csv').values], axis=0) labels = np.concatenate( [labels, tools.read_csv('data/CorrectionsLabels00.csv').values], axis=0) data = np.concatenate( [data, tools.read_csv('data/CorrectionsDeltas01.csv').values], axis=0) labels = np.concatenate( [labels, tools.read_csv('data/CorrectionsLabels01.csv').values], axis=0) data = np.concatenate( [data, tools.read_csv('data/CorrectionsDeltas02.csv').values], axis=0) labels = np.concatenate( [labels, tools.read_csv('data/CorrectionsLabels02.csv').values], axis=0) data = np.concatenate( [data, tools.read_csv('data/CorrectionsDeltas03.csv').values], axis=0) labels = np.concatenate( [labels, tools.read_csv('data/CorrectionsLabels03.csv').values], axis=0) data = np.concatenate( [data, tools.read_csv('data/CorrectionsDeltas04.csv').values], axis=0) labels = np.concatenate( [labels, tools.read_csv('data/CorrectionsLabels04.csv').values], axis=0) data = np.concatenate( [data, tools.read_csv('data/CorrectionsDeltas05.csv').values], axis=0) labels = np.concatenate( [labels, tools.read_csv('data/CorrectionsLabels05.csv').values], axis=0) data = np.concatenate( [data, tools.read_csv('data/CorrectionsDeltas06.csv').values], axis=0) labels = np.concatenate( [labels, tools.read_csv('data/CorrectionsLabels06.csv').values], axis=0) data = np.concatenate( [data, tools.read_csv('data/CorrectionsDeltas07.csv').values], axis=0) labels = np.concatenate( [labels, tools.read_csv('data/CorrectionsLabels07.csv').values], axis=0) data = np.concatenate([data, data * 0.5], axis=0) labels = np.concatenate([labels, labels], axis=0) data = np.concatenate([data, data * 0.75], axis=0) labels = np.concatenate([labels, labels], axis=0) data = np.concatenate([data, data * 1.25], axis=0) labels = np.concatenate([labels, labels], axis=0) tf.random.shuffle(data, seed=999) tf.random.shuffle(labels, seed=999) np.savetxt('data/GroupedDeltas.csv', data, delimiter=',') np.savetxt('data/GroupedLabels.csv', labels, delimiter=',') return data, labels
# csv import random import tools data = tools.read_csv("persons.csv") for row in data[1:]: row[1] = int(row[1]) + 1 data[0].append("Education") for row in data[1:]: row.append(random.choice([0, 1])) tools.write_csv("test_2.csv", data) # header = data.pop(0) # for row in data: # print(row[1]) # data = [header] + data filename = "persons.csv" data = tools.read_dict_csv(filename=filename) for row in data: row["Education"] = random.choice([0, 1]) tools.write_dict_csv("persons_2.csv", data) print(data)
def conclude_call(id): archive = tools.read_csv(tools.path_vendas) for row in archive: if row[3] == id: print(row[1])