Example #1
0
def csv2pqt(year,
            month,
            entries,
            bkt,
            dst_root,
            overwrite,
            public=False,
            start=None):
    name = '%d%02d' % (year, month)
    if start:
        start_year, start_month = int(start[:4]), int(start[4:])
        if (year, month) < (start_year, start_month):
            return 'Skipping month %s < %d%02d' % (name, start_year,
                                                   start_month)

    dst_name = f'{name}.parquet'
    if dst_root:
        dst_key = f'{dst_root}/{dst_name}'
    else:
        dst_key = dst_name

    return convert_file(
        normalize_csvs,
        bkt=bkt,
        entries=entries,
        dst_key=dst_key,
        overwrite=overwrite,
        public=public,
    ).get('msg')
Example #2
0
def original_to_csv(src_bkt,
                    zip_key,
                    dst_bkt,
                    error='warn',
                    overwrite=False,
                    dst_root=None):
    def dst_key(src_name):
        m = match(rgx, src_name)
        if not m:
            raise BadKey(src_name)
        _, ext = splitext(src_name)
        assert ext == '.zip'

        # normalize the dst path; a few src files have typos/inconsistencies
        base = '%s%s%s-citibike-tripdata' % (m['JC']
                                             or '', m['year'], m['month'])
        if dst_root is None:
            return f'{base}.csv'
        else:
            return f'{dst_root}/{base}.csv'

    return convert_file(
        to_csv,
        src_bkt=src_bkt,
        src_key=zip_key,
        dst_bkt=dst_bkt,
        dst_key=dst_key,
        error=error,
        overwrite=overwrite,
    ).get('msg', '')
Example #3
0
def main(args):
    file_format = re.compile(r'^\.\w+$')

    # ensure that the program ffmpeg is installed on the system
    utils.check_ffmpeg_installed()

    convert_dir = args.type == 'dir'

    from_format = args.from_format
    if file_format.match(from_format) is None:
        raise argparse.ArgumentTypeError(
            'The from format should be a . '
            'followed by alphabetic characters, for example, '
            '.avi .mkv')

    to = args.to
    if file_format.match(to) is None:
        raise argparse.ArgumentTypeError(
            'The target format is not valid, should be a '
            '. followed by alphabetic characters, for example, '
            '.mp4')

    target = args.target
    if convert_dir:
        if not os.path.isdir(target[0]):
            raise argparse.ArgumentTypeError(
                '\'{}\' is not a directory'.format(target[0]))
    else:
        for f in target:
            if not os.path.isfile(f):
                raise argparse.ArgumentTypeError(
                    '\'{}\' is not a file'.format(f))

    delete_old = args.delete_old

    print('Converting files...')
    if convert_dir:
        utils.convert_directory(target[0],
                                from_format,
                                to,
                                delete_original=delete_old)
    else:
        for f in target:
            utils.convert_file(f, from_format, to, delete_original=delete_old)
Example #4
0
        def put(name):
            if dst_root:
                dst_key = f'{dst_root}/{name}'
            else:
                dst_key = name

            dst = f's3://{dst_bucket}/{dst_key}'
            print(f'Computing: {dst}')

            result = convert_file(
                write,
                dst=dst,
                fmt=fmt,
                public=public,
                overwrite=overwrite,
            )
            print(result.get('msg'))
Example #5
0
def run_convert(csv_input_file, export_file=None):
    """
    Converts the CSV data into a JSON format. Default behaviour is to print to command line. Outputs to file if specified.
    """

    print('Started: CSV convert - {0}'.format(datetime.now()))

    with open(csv_input_file, encoding='utf-8', errors='ignore') as csv_file:
        json_records = utils.convert_file(csv_file)
        
        if export_file:
            with open(export_file, mode='w') as json_file:
                # json_file.write(json.dumps([json_obj for json_obj in json_records],sort_keys=True, indent=4))
                json_file.write(utils.create_json_strings(json_records))
        else:
            print(utils.create_json_strings(json_records))

    print('Finished: CSV convert - {0}'.format(datetime.now()))
Example #6
0
        # build markdown files
        try:
            os.mkdir(build_path)
        except OSError:
            # folder already exists
            pass

        # set theme if not user-defined
        theme.check(bin_path, base_path)
        # load theme
        template = theme.load_theme(base_path)

        for node in site_tree.iter():
            if node.ext in (".md", ".markdown"):
                utils.convert_file(build_path, node, template)

        print("Website built sussessfully.")

    if args.sync:
        # load ftp settings
        # ok, it's pretty difficult to read i know :)
        with open(os.path.join(base_path, "settings.txt")) as set_file:
            raw = [line.strip() for line in set_file.readlines()]
            settings = dict([[t.strip() for t in line.split("=")]
                             for line in raw if line != ""])

        # open connection and upload files...
        with ftplib.FTP(settings["host"], settings["user"],
                        settings["password"]) as ftp:
            for node in site_tree.iter():
Example #7
0
def main(input_file_name: "Input file name",
         output_file_name: "Output file name"):
    convert_file(gen_line, input_file_name, output_file_name)
Example #8
0
def save_output(data, accuracy, file_name):
    output = open("{}.txt".format(file_name), "w+")
    output.write("Naive Bayes accuracy: {}%\n".format(accuracy * 100))
    for row in data:
        row[0] = "Predicted label: {}".format(row[0])
        output.write("{}\n".format(row))


if __name__ == '__main__':

    TRAINING_FILE_PATH = sys.argv[1]
    TESTING_FILE_PATH = sys.argv[2]
    OUTPUT_FILE = sys.argv[3]

    # convert the training data
    training_data = convert_file(TRAINING_FILE_PATH)
    # get the relevant data for testing
    label_probs, column_probs, evidence_probs = train_naive_bayes(
        training_data)

    # now you can classify!
    test_data = convert_file(TESTING_FILE_PATH)
    classified_data = classify_data_set(label_probs, column_probs,
                                        evidence_probs, test_data)

    accuracy = test_accuracy(classified_data, test_data)
    print "Accuracy: {}%".format(accuracy * 100)

    # save the output
    save_output(classified_data, accuracy, OUTPUT_FILE)
Example #9
0
		
		# build markdown files
		try:
			os.mkdir(build_path)
		except OSError:
			# folder already exists
			pass
		
		# set theme if not user-defined
		theme.check(bin_path, base_path)
		# load theme
		template = theme.load_theme(base_path)
		
		for node in site_tree.iter():
			if node.ext in (".md", ".markdown"):
				utils.convert_file(build_path, node, template)
		
		print("Website built sussessfully.")
	
	if args.sync:
		# load ftp settings
		# ok, it's pretty difficult to read i know :)
		with open(os.path.join(base_path, "settings.txt")) as set_file:
			raw = [line.strip() for line in set_file.readlines()]
			settings = dict([[t.strip() for t in line.split("=")]
								for line in raw if line != ""])
		
		# open connection and upload files...
		with ftplib.FTP(settings["host"], settings["user"], settings["password"]) as ftp:
			for node in site_tree.iter():
				if isinstance(node, tree.Folder) and node.parent != None:
Example #10
0
        iterations += 1

    return centroids, data_points, iterations


if __name__ == '__main__':

    # SYSTEM INPUT VARIABLES
    DATA_FILE_PATH = sys.argv[1]
    INPUT_K = int(sys.argv[2])
    OUTPUT_FILE = sys.argv[3]

    print "Running k means on {}!".format(DATA_FILE_PATH)

    # Convert original data into a 2d array
    data = convert_file(DATA_FILE_PATH)

    # Remove the label from each data
    # only for use with the original file
    unlabeled_data = [row[:LABEL_LOCATION] for row in data]

    # Converts each string data element into a float, removes any empty rows
    numerized_data = filter(lambda x: len(x) > 0, [[float(i) for i in row]
                                                   for row in unlabeled_data])

    # store sse's for post analyses
    print "\nTESTING ON K = {}".format(INPUT_K)

    # Run k means on the numerized data
    centroids, results, iterations = k_means(INPUT_K, numerized_data)
Example #11
0
import cv2
import numpy as np
import utils as ut
import operator
from matplotlib import pyplot as plt
from functools import reduce
from sklearn.cluster import KMeans
import os

ut.convert_file()

img = cv2.imread('/home/alvaro/Coffee_Recognize_API/img_hsv/fitossanidade.png')
rect = (188, 224, 382, 687)
clusters = 5

results = []

bar, porcentagens = ut.toKmeans(ut.black_back(img, rect), clusters)
contamination = (porcentagens[-1] * 100) / reduce(operator.add, porcentagens)

leaf_disease = round(reduce(operator.add, porcentagens), 2)
contamination = round(contamination, 2)

results.append(leaf_disease)
results.append(contamination)
print(results)