Exemple #1
0
    def __init__(self, url=None, path=None, handle=None, metadata_url=None, metadata_path=None, metadata_handle=None, date_parsing=False):
        # http://www.w3.org/TR/2015/WD-tabular-data-model-20150416/#processing-tables
        if handle:
            logger.warning('"handle" is used only for testing purposes')
            name = None
        elif url:
            url_resp = urllib2.urlopen(url)
            handle = StringIO(url_resp.read())
            name = url
        elif path:
            handle = open(path, 'rb')
            name = path
        elif path and url:
            raise ValueError("only one argument of url and path allowed")
        else:
            raise ValueError("url or path argument required")

        # metadata_handle = None
        if metadata_path and metadata_url:
            raise ValueError("only one argument of metadata_url and metadata_path allowed")
        elif metadata_handle:
            logger.warning('"metadata_handle" is used only for testing purposes')
        elif metadata_url:
            meta_resp = urllib2.urlopen(metadata_url)
            metadata_handle = StringIO(meta_resp.read())
        elif metadata_path:
            metadata_handle = open(metadata_path, 'rb')

        # Retrieve the tabular data file.
        self.table, embedded_metadata = csv_parser.parse(handle, url)

        # TODO create settings using arguments or provided metadata
        sources = metadata_extractor.metadata_extraction(url, metadata_handle, embedded_metadata=embedded_metadata)
        self.metadata = metadata.merge(sources)
Exemple #2
0
def main(data, robot, pool, clean, debug):
    if debug:
        robot = MagicMock()
    else:
        robot = j.clients.zrobot.robots[robot]

    if clean:
        clean_env(robot)

    _, ext = os.path.splitext(data)
    if ext == '.json':
        input = j.data.serializer.json.load(data)
    elif ext == '.yaml':
        input = j.data.serializer.yaml.load(data)
    elif ext == '.csv':
        input = csv_parser.parse(data)
    else:
        raise ValueError(
            "data file extension not supported. Only supproted type are json, yaml and csv"
        )

    pool_name = pool
    if 'zeroboot_pool' in input:
        pool_name = input.pop('zeroboot_pool')

    logger.info("pool name: %s" % pool_name)

    logger.info("start creation of services")

    for template, instances in input.items():
        for instance, data in instances.items():
            logger.info("create service %s %s" % (template, instance))
            service = robot.services.find_or_create(
                "github.com/threefoldtech/0-templates/%s/0.0.1" % template,
                instance,
                data=data)

    hosts = robot.services.find(
        template_name='zeroboot_racktivity_host') + robot.services.find(
            template_name='zeroboot_ipmi_host')
    for service in hosts:
        try:
            service.state.check('actions', 'install', 'ok')
            logger.info("\talready installed")
        except StateCheckError:
            logger.info("\tinstall service")
            service.schedule_action('install').wait(die=True)

    logger.info("create service zeroboot_pool %s" % pool_name)
    pool = robot.services.find_or_create(
        "github.com/threefoldtech/0-templates/zeroboot_pool/0.0.1",
        pool_name,
        data={'zerobootHosts': [h.name for h in hosts]})

    logger.info("installation done")
def main():
    matrix = csv_parser.parse(sys.argv[1])
    output_file = sys.argv[2]

    with open(output_file, 'w+') as output:
        csvwriter = csv.writer(output)

        for user in matrix:
            similarity_vector = []
            for other_user in matrix:
                similarity_vector.append(
                    round(
                        n_nearest_neighbors.cosine_similarity(
                            user, other_user), 3))
            csvwriter.writerow(similarity_vector)
Exemple #4
0
def validate_handle(csv_handle, csv_file_name, schema_handle):
    table, embedded_schema = csv_parser.parse(csv_handle, None)
    schema = simplejson.load(schema_handle)
    tableSchema = None
    if "tables" in schema:
        talbes = schema["tables"]
        for i, current_table in enumerate(talbes):
            if "url" in current_table and current_table["url"] == csv_file_name:
                tableSchema = current_table
                break
    else:
        tableSchema = schema

    if not tableSchema:
        return (False, "Could not find schema for table %s: " % csv_file_name)

    valid, error_message = validate_columns_name(embedded_schema, tableSchema)
    if valid:
        return validate_table_data(table, tableSchema)
    else:
        return valid, error_message
Exemple #5
0
def validate_handle(csv_handle, csv_file_name, schema_handle):
    table, embedded_schema = csv_parser.parse(csv_handle, None)
    schema = simplejson.load(schema_handle)
    tableSchema = None
    if "tables" in schema:
        talbes = schema["tables"]
        for i, current_table in enumerate(talbes):
            if "url" in current_table and current_table["url"] == csv_file_name:
                tableSchema = current_table
                break
    else:
        tableSchema = schema
    
    if not tableSchema:
        return (False, "Could not find schema for table %s: " % csv_file_name)
    
    valid, error_message = validate_columns_name(embedded_schema, tableSchema)
    if valid:
        return validate_table_data(table, tableSchema)
    else:
        return valid, error_message
def main():
    records = csv_parser.parse(sys.argv[1])
    recordnumber = 0
    for record in records:
        k = record[0] + ':' + record[2] + ':' + record[3]
        serverchoice = getServer(k)
        key = hashlib.sha1(k.encode('utf-8')).hexdigest()
        url = serverchoice + '/api/v1/entries'
        value = ','.join(map(str, record))
        r = requests.post(url, json={key: value})
        recordnumber += 1
    print('Uploaded all ' + str(recordnumber) + ' entries.')
    print('Verifying the data.')
    for server in servers:
        print('GET ' + server)
        print('{')
        serverurl = server + '/api/v1/entries'
        r = requests.get(serverurl)
        print(json.dumps(json.loads(r.text), indent=3))
        print('}\n')
    print('Finished.')
def main():
    # populate ring with server hashes
    ring = populateRingServers()
    # print(ring)
    # creating sorted array to find smallest server hash value that is greater than hashed key
    ringArray = []
    for key, value in ring.items():
        ringArray.append(value)
    ringArray.sort()
    ringArray = np.array(ringArray)
    # print(ringArray)
    records = csv_parser.parse(sys.argv[1])
    recordnumber = 0

    for record in records:
        k = record[0] + ':' + record[2] + ':' + record[3]
        # for each record find corresponding server based on hash, returned server hash must be smallest
        # server hash that is greater than keyhash, if it is bigger then all then it should loop to smallest server hash
        serverHash = matchKeyNode(k, ringArray)
        # after finding this, search the dictionary for corresponding server address to post
        serverAddress = findServer(serverHash, ring)
        keyHash = hashlib.sha1(k.encode('utf-8')).hexdigest()
        url = serverAddress + '/api/v1/entries'
        value = ','.join(map(str, record))
        r = requests.post(url, json={keyHash: value})
        recordnumber += 1
    print('Uploaded all ' + str(recordnumber) + ' entries.')
    print('Verifying the data.')

    for server in servers:
        print('GET ' + server)
        print('{')
        serverurl = server + '/api/v1/entries'
        r = requests.get(serverurl)
        print(json.dumps(json.loads(r.text), indent=3))
        print('}\n')
    print('Finished.')
 def load(self):
     data = parse(self.file)
     self.headers = data['headers']
     self.rows = data['rows']
     return self
Exemple #9
0
def main():
    data_file = p.parse(MY_FILE, ",")
    visualize_companies(data_file)
Exemple #10
0
        print("Classifications: " + str(num_rows))
        print("Misclassifications: " + str(misclassifications))
        print("Misclassification rate: " + str(misclass_rate))

    return (knn.c_index(labels, predictions))


#######################
#    Actual script    #
#######################

FILENAME_FEATURES = "data/symmetric_pair_input/features.data"
FILENAME_LABELS = "data/symmetric_pair_input/labels.data"
FILENAME_PAIRS = "data/symmetric_pair_input/pairs.data"

features = csv_parser.parse(FILENAME_FEATURES)
labels = csv_parser.parse(FILENAME_LABELS, int)
pairs = csv_parser.parse(FILENAME_PAIRS, str)

#Bind k to the classification function so that cross-validation doesn't need
#to know anything about the prediction function
num_neighbors = 1
f_predict = partial(knn.predict_classification, k=num_neighbors)

#First perform normal leave-one-out cross-validation
c_ix = loo_cv(features, labels, f_predict)
print("c_index for loo_cv and knn with k = " + str(num_neighbors) + " was " +
      str(c_ix) + "\n")

#Then the modified cross-validation that considers the protein pairs
c_ix = loo_cv_with_pairwise_filtering(features, labels, pairs, f_predict)
import csv_parser
import regression_analyzer
import plotter
import sys
import math

input_file_path = sys.argv[1]
data = csv_parser.parse(input_file_path)

quadratic_constants = regression_analyzer.quadraticRegression(data)
print(quadratic_constants)
x_values = len(data)
model = [
    quadratic_constants['a'] * x**2 + quadratic_constants['b'] * x +
    quadratic_constants['c'] for x in list(range(0, x_values))
]

plotter.drawGraph(data, model, x_values)

exp_consts = regression_analyzer.exponentialRegression(data)
print(exp_consts)
model = [
    exp_consts['a'] * math.exp(exp_consts['b'] * x)
    for x in list(range(0, x_values))
]
plotter.drawGraph(data, model, x_values)
Exemple #12
0
def main():
    matrix = csv_parser.parse(sys.argv[1])
    # random_sampling(matrix, *map(int, sys.argv[2:]))
    user_specified(matrix, 1, [(0, 8), (15, 78), (22000, 43)])
def main():
    handle_arguments(sys.argv)
    evaluation.user_specified(csv_parser.parse('dataset.csv'),
                              int(sys.argv[1]), parse_input(sys.argv[2]))
Exemple #14
0
def main(*args):
    handle_arguments(args)
    evaluation.random_sampling(csv_parser.parse('dataset.csv'),
                               *map(int, sys.argv[1:]))
def main():
    data_file = p.parse(MY_FILE, ",")
    visualize_companies(data_file)
def main(program_name, file_name, n):
    matrix = csv_parser.parse(file_name)
    start_time = time.time()
    rating = adjusted_weighted_sum(matrix, 0, 0, int(n))
    print(f"rating: {rating}, time: {time.time() - start_time} seconds")
 def load(self):
     data = parse(self.file)
     self.headers = data['headers']
     self.rows = data['rows']
     return self
    # Welcome Title Program Screen Text
    print("====================================")
    print(" GROUP 2 - REGRESSION SOFTWARE")
    print(" CS431-01")
    print(" Fall 2017")
    print("====================================")
    print()
    """
    LOOP 1 - Process available command line arguments
    """
    inputFilePath = sys.argv[1]
    """
    Plot and draw the raw input data values onto figure window
    """
    rawDataValues = parser.parse(inputFilePath)
    plotter.drawGraph(rawDataValues, 'scatter')

    # What regression graph to display?
    if (sys.argv[2] == 'linear'):
        linearRegressionDataValues = reg.linearRegression(
            parser.parse(inputFilePath))
        plotter.drawGraph(linearRegressionDataValues, 'line')
    elif (sys.argv[2] == 'exponential'):
        exponentialRegressionDataValues = reg.exponentialRegression(
            parser.parse(inputFilePath))
        plotter.drawGraph(exponentialRegressionDataValues, 'line')
    else:
        quadraticRegressionDataValues = reg.quadraticRegression(
            parser.parse(inputFilePath))
        plotter.drawGraph(quadraticRegressionDataValues, 'line')