def csv2pqt(year, month, entries, bkt, dst_root, overwrite, public=False, start=None): name = '%d%02d' % (year, month) if start: start_year, start_month = int(start[:4]), int(start[4:]) if (year, month) < (start_year, start_month): return 'Skipping month %s < %d%02d' % (name, start_year, start_month) dst_name = f'{name}.parquet' if dst_root: dst_key = f'{dst_root}/{dst_name}' else: dst_key = dst_name return convert_file( normalize_csvs, bkt=bkt, entries=entries, dst_key=dst_key, overwrite=overwrite, public=public, ).get('msg')
def original_to_csv(src_bkt, zip_key, dst_bkt, error='warn', overwrite=False, dst_root=None): def dst_key(src_name): m = match(rgx, src_name) if not m: raise BadKey(src_name) _, ext = splitext(src_name) assert ext == '.zip' # normalize the dst path; a few src files have typos/inconsistencies base = '%s%s%s-citibike-tripdata' % (m['JC'] or '', m['year'], m['month']) if dst_root is None: return f'{base}.csv' else: return f'{dst_root}/{base}.csv' return convert_file( to_csv, src_bkt=src_bkt, src_key=zip_key, dst_bkt=dst_bkt, dst_key=dst_key, error=error, overwrite=overwrite, ).get('msg', '')
def main(args): file_format = re.compile(r'^\.\w+$') # ensure that the program ffmpeg is installed on the system utils.check_ffmpeg_installed() convert_dir = args.type == 'dir' from_format = args.from_format if file_format.match(from_format) is None: raise argparse.ArgumentTypeError( 'The from format should be a . ' 'followed by alphabetic characters, for example, ' '.avi .mkv') to = args.to if file_format.match(to) is None: raise argparse.ArgumentTypeError( 'The target format is not valid, should be a ' '. followed by alphabetic characters, for example, ' '.mp4') target = args.target if convert_dir: if not os.path.isdir(target[0]): raise argparse.ArgumentTypeError( '\'{}\' is not a directory'.format(target[0])) else: for f in target: if not os.path.isfile(f): raise argparse.ArgumentTypeError( '\'{}\' is not a file'.format(f)) delete_old = args.delete_old print('Converting files...') if convert_dir: utils.convert_directory(target[0], from_format, to, delete_original=delete_old) else: for f in target: utils.convert_file(f, from_format, to, delete_original=delete_old)
def put(name): if dst_root: dst_key = f'{dst_root}/{name}' else: dst_key = name dst = f's3://{dst_bucket}/{dst_key}' print(f'Computing: {dst}') result = convert_file( write, dst=dst, fmt=fmt, public=public, overwrite=overwrite, ) print(result.get('msg'))
def run_convert(csv_input_file, export_file=None): """ Converts the CSV data into a JSON format. Default behaviour is to print to command line. Outputs to file if specified. """ print('Started: CSV convert - {0}'.format(datetime.now())) with open(csv_input_file, encoding='utf-8', errors='ignore') as csv_file: json_records = utils.convert_file(csv_file) if export_file: with open(export_file, mode='w') as json_file: # json_file.write(json.dumps([json_obj for json_obj in json_records],sort_keys=True, indent=4)) json_file.write(utils.create_json_strings(json_records)) else: print(utils.create_json_strings(json_records)) print('Finished: CSV convert - {0}'.format(datetime.now()))
# build markdown files try: os.mkdir(build_path) except OSError: # folder already exists pass # set theme if not user-defined theme.check(bin_path, base_path) # load theme template = theme.load_theme(base_path) for node in site_tree.iter(): if node.ext in (".md", ".markdown"): utils.convert_file(build_path, node, template) print("Website built sussessfully.") if args.sync: # load ftp settings # ok, it's pretty difficult to read i know :) with open(os.path.join(base_path, "settings.txt")) as set_file: raw = [line.strip() for line in set_file.readlines()] settings = dict([[t.strip() for t in line.split("=")] for line in raw if line != ""]) # open connection and upload files... with ftplib.FTP(settings["host"], settings["user"], settings["password"]) as ftp: for node in site_tree.iter():
def main(input_file_name: "Input file name", output_file_name: "Output file name"): convert_file(gen_line, input_file_name, output_file_name)
def save_output(data, accuracy, file_name): output = open("{}.txt".format(file_name), "w+") output.write("Naive Bayes accuracy: {}%\n".format(accuracy * 100)) for row in data: row[0] = "Predicted label: {}".format(row[0]) output.write("{}\n".format(row)) if __name__ == '__main__': TRAINING_FILE_PATH = sys.argv[1] TESTING_FILE_PATH = sys.argv[2] OUTPUT_FILE = sys.argv[3] # convert the training data training_data = convert_file(TRAINING_FILE_PATH) # get the relevant data for testing label_probs, column_probs, evidence_probs = train_naive_bayes( training_data) # now you can classify! test_data = convert_file(TESTING_FILE_PATH) classified_data = classify_data_set(label_probs, column_probs, evidence_probs, test_data) accuracy = test_accuracy(classified_data, test_data) print "Accuracy: {}%".format(accuracy * 100) # save the output save_output(classified_data, accuracy, OUTPUT_FILE)
# build markdown files try: os.mkdir(build_path) except OSError: # folder already exists pass # set theme if not user-defined theme.check(bin_path, base_path) # load theme template = theme.load_theme(base_path) for node in site_tree.iter(): if node.ext in (".md", ".markdown"): utils.convert_file(build_path, node, template) print("Website built sussessfully.") if args.sync: # load ftp settings # ok, it's pretty difficult to read i know :) with open(os.path.join(base_path, "settings.txt")) as set_file: raw = [line.strip() for line in set_file.readlines()] settings = dict([[t.strip() for t in line.split("=")] for line in raw if line != ""]) # open connection and upload files... with ftplib.FTP(settings["host"], settings["user"], settings["password"]) as ftp: for node in site_tree.iter(): if isinstance(node, tree.Folder) and node.parent != None:
iterations += 1 return centroids, data_points, iterations if __name__ == '__main__': # SYSTEM INPUT VARIABLES DATA_FILE_PATH = sys.argv[1] INPUT_K = int(sys.argv[2]) OUTPUT_FILE = sys.argv[3] print "Running k means on {}!".format(DATA_FILE_PATH) # Convert original data into a 2d array data = convert_file(DATA_FILE_PATH) # Remove the label from each data # only for use with the original file unlabeled_data = [row[:LABEL_LOCATION] for row in data] # Converts each string data element into a float, removes any empty rows numerized_data = filter(lambda x: len(x) > 0, [[float(i) for i in row] for row in unlabeled_data]) # store sse's for post analyses print "\nTESTING ON K = {}".format(INPUT_K) # Run k means on the numerized data centroids, results, iterations = k_means(INPUT_K, numerized_data)
import cv2 import numpy as np import utils as ut import operator from matplotlib import pyplot as plt from functools import reduce from sklearn.cluster import KMeans import os ut.convert_file() img = cv2.imread('/home/alvaro/Coffee_Recognize_API/img_hsv/fitossanidade.png') rect = (188, 224, 382, 687) clusters = 5 results = [] bar, porcentagens = ut.toKmeans(ut.black_back(img, rect), clusters) contamination = (porcentagens[-1] * 100) / reduce(operator.add, porcentagens) leaf_disease = round(reduce(operator.add, porcentagens), 2) contamination = round(contamination, 2) results.append(leaf_disease) results.append(contamination) print(results)