def __init__(self, url=None, path=None, handle=None, metadata_url=None, metadata_path=None, metadata_handle=None, date_parsing=False): # http://www.w3.org/TR/2015/WD-tabular-data-model-20150416/#processing-tables if handle: logger.warning('"handle" is used only for testing purposes') name = None elif url: url_resp = urllib2.urlopen(url) handle = StringIO(url_resp.read()) name = url elif path: handle = open(path, 'rb') name = path elif path and url: raise ValueError("only one argument of url and path allowed") else: raise ValueError("url or path argument required") # metadata_handle = None if metadata_path and metadata_url: raise ValueError("only one argument of metadata_url and metadata_path allowed") elif metadata_handle: logger.warning('"metadata_handle" is used only for testing purposes') elif metadata_url: meta_resp = urllib2.urlopen(metadata_url) metadata_handle = StringIO(meta_resp.read()) elif metadata_path: metadata_handle = open(metadata_path, 'rb') # Retrieve the tabular data file. self.table, embedded_metadata = csv_parser.parse(handle, url) # TODO create settings using arguments or provided metadata sources = metadata_extractor.metadata_extraction(url, metadata_handle, embedded_metadata=embedded_metadata) self.metadata = metadata.merge(sources)
def main(data, robot, pool, clean, debug): if debug: robot = MagicMock() else: robot = j.clients.zrobot.robots[robot] if clean: clean_env(robot) _, ext = os.path.splitext(data) if ext == '.json': input = j.data.serializer.json.load(data) elif ext == '.yaml': input = j.data.serializer.yaml.load(data) elif ext == '.csv': input = csv_parser.parse(data) else: raise ValueError( "data file extension not supported. Only supproted type are json, yaml and csv" ) pool_name = pool if 'zeroboot_pool' in input: pool_name = input.pop('zeroboot_pool') logger.info("pool name: %s" % pool_name) logger.info("start creation of services") for template, instances in input.items(): for instance, data in instances.items(): logger.info("create service %s %s" % (template, instance)) service = robot.services.find_or_create( "github.com/threefoldtech/0-templates/%s/0.0.1" % template, instance, data=data) hosts = robot.services.find( template_name='zeroboot_racktivity_host') + robot.services.find( template_name='zeroboot_ipmi_host') for service in hosts: try: service.state.check('actions', 'install', 'ok') logger.info("\talready installed") except StateCheckError: logger.info("\tinstall service") service.schedule_action('install').wait(die=True) logger.info("create service zeroboot_pool %s" % pool_name) pool = robot.services.find_or_create( "github.com/threefoldtech/0-templates/zeroboot_pool/0.0.1", pool_name, data={'zerobootHosts': [h.name for h in hosts]}) logger.info("installation done")
def main(): matrix = csv_parser.parse(sys.argv[1]) output_file = sys.argv[2] with open(output_file, 'w+') as output: csvwriter = csv.writer(output) for user in matrix: similarity_vector = [] for other_user in matrix: similarity_vector.append( round( n_nearest_neighbors.cosine_similarity( user, other_user), 3)) csvwriter.writerow(similarity_vector)
def validate_handle(csv_handle, csv_file_name, schema_handle): table, embedded_schema = csv_parser.parse(csv_handle, None) schema = simplejson.load(schema_handle) tableSchema = None if "tables" in schema: talbes = schema["tables"] for i, current_table in enumerate(talbes): if "url" in current_table and current_table["url"] == csv_file_name: tableSchema = current_table break else: tableSchema = schema if not tableSchema: return (False, "Could not find schema for table %s: " % csv_file_name) valid, error_message = validate_columns_name(embedded_schema, tableSchema) if valid: return validate_table_data(table, tableSchema) else: return valid, error_message
def main(): records = csv_parser.parse(sys.argv[1]) recordnumber = 0 for record in records: k = record[0] + ':' + record[2] + ':' + record[3] serverchoice = getServer(k) key = hashlib.sha1(k.encode('utf-8')).hexdigest() url = serverchoice + '/api/v1/entries' value = ','.join(map(str, record)) r = requests.post(url, json={key: value}) recordnumber += 1 print('Uploaded all ' + str(recordnumber) + ' entries.') print('Verifying the data.') for server in servers: print('GET ' + server) print('{') serverurl = server + '/api/v1/entries' r = requests.get(serverurl) print(json.dumps(json.loads(r.text), indent=3)) print('}\n') print('Finished.')
def main(): # populate ring with server hashes ring = populateRingServers() # print(ring) # creating sorted array to find smallest server hash value that is greater than hashed key ringArray = [] for key, value in ring.items(): ringArray.append(value) ringArray.sort() ringArray = np.array(ringArray) # print(ringArray) records = csv_parser.parse(sys.argv[1]) recordnumber = 0 for record in records: k = record[0] + ':' + record[2] + ':' + record[3] # for each record find corresponding server based on hash, returned server hash must be smallest # server hash that is greater than keyhash, if it is bigger then all then it should loop to smallest server hash serverHash = matchKeyNode(k, ringArray) # after finding this, search the dictionary for corresponding server address to post serverAddress = findServer(serverHash, ring) keyHash = hashlib.sha1(k.encode('utf-8')).hexdigest() url = serverAddress + '/api/v1/entries' value = ','.join(map(str, record)) r = requests.post(url, json={keyHash: value}) recordnumber += 1 print('Uploaded all ' + str(recordnumber) + ' entries.') print('Verifying the data.') for server in servers: print('GET ' + server) print('{') serverurl = server + '/api/v1/entries' r = requests.get(serverurl) print(json.dumps(json.loads(r.text), indent=3)) print('}\n') print('Finished.')
def load(self): data = parse(self.file) self.headers = data['headers'] self.rows = data['rows'] return self
def main(): data_file = p.parse(MY_FILE, ",") visualize_companies(data_file)
print("Classifications: " + str(num_rows)) print("Misclassifications: " + str(misclassifications)) print("Misclassification rate: " + str(misclass_rate)) return (knn.c_index(labels, predictions)) ####################### # Actual script # ####################### FILENAME_FEATURES = "data/symmetric_pair_input/features.data" FILENAME_LABELS = "data/symmetric_pair_input/labels.data" FILENAME_PAIRS = "data/symmetric_pair_input/pairs.data" features = csv_parser.parse(FILENAME_FEATURES) labels = csv_parser.parse(FILENAME_LABELS, int) pairs = csv_parser.parse(FILENAME_PAIRS, str) #Bind k to the classification function so that cross-validation doesn't need #to know anything about the prediction function num_neighbors = 1 f_predict = partial(knn.predict_classification, k=num_neighbors) #First perform normal leave-one-out cross-validation c_ix = loo_cv(features, labels, f_predict) print("c_index for loo_cv and knn with k = " + str(num_neighbors) + " was " + str(c_ix) + "\n") #Then the modified cross-validation that considers the protein pairs c_ix = loo_cv_with_pairwise_filtering(features, labels, pairs, f_predict)
import csv_parser import regression_analyzer import plotter import sys import math input_file_path = sys.argv[1] data = csv_parser.parse(input_file_path) quadratic_constants = regression_analyzer.quadraticRegression(data) print(quadratic_constants) x_values = len(data) model = [ quadratic_constants['a'] * x**2 + quadratic_constants['b'] * x + quadratic_constants['c'] for x in list(range(0, x_values)) ] plotter.drawGraph(data, model, x_values) exp_consts = regression_analyzer.exponentialRegression(data) print(exp_consts) model = [ exp_consts['a'] * math.exp(exp_consts['b'] * x) for x in list(range(0, x_values)) ] plotter.drawGraph(data, model, x_values)
def main(): matrix = csv_parser.parse(sys.argv[1]) # random_sampling(matrix, *map(int, sys.argv[2:])) user_specified(matrix, 1, [(0, 8), (15, 78), (22000, 43)])
def main(): handle_arguments(sys.argv) evaluation.user_specified(csv_parser.parse('dataset.csv'), int(sys.argv[1]), parse_input(sys.argv[2]))
def main(*args): handle_arguments(args) evaluation.random_sampling(csv_parser.parse('dataset.csv'), *map(int, sys.argv[1:]))
def main(program_name, file_name, n): matrix = csv_parser.parse(file_name) start_time = time.time() rating = adjusted_weighted_sum(matrix, 0, 0, int(n)) print(f"rating: {rating}, time: {time.time() - start_time} seconds")
# Welcome Title Program Screen Text print("====================================") print(" GROUP 2 - REGRESSION SOFTWARE") print(" CS431-01") print(" Fall 2017") print("====================================") print() """ LOOP 1 - Process available command line arguments """ inputFilePath = sys.argv[1] """ Plot and draw the raw input data values onto figure window """ rawDataValues = parser.parse(inputFilePath) plotter.drawGraph(rawDataValues, 'scatter') # What regression graph to display? if (sys.argv[2] == 'linear'): linearRegressionDataValues = reg.linearRegression( parser.parse(inputFilePath)) plotter.drawGraph(linearRegressionDataValues, 'line') elif (sys.argv[2] == 'exponential'): exponentialRegressionDataValues = reg.exponentialRegression( parser.parse(inputFilePath)) plotter.drawGraph(exponentialRegressionDataValues, 'line') else: quadraticRegressionDataValues = reg.quadraticRegression( parser.parse(inputFilePath)) plotter.drawGraph(quadraticRegressionDataValues, 'line')