def main():
    # configure logging
    logging.basicConfig(level=logging.INFO)
    logging.info(cf_netSDM)
    # Read inputs
    inputs = io_helper.fetch_data()
    data = inputs['data']
    normalize = get_param(inputs['parameters'], 'normalize', bool, 'True')
    damping = get_param(inputs['parameters'], 'damping', float, '0.85')
    data_array = np.zeros((len(data['independent'][0]['series']), len(data['independent'])))
    col_number = 0
    row_number = 0
    for var in data['independent']:
        for value in var['series']:
            data_array[row_number, col_number] = value
            row_number += 1
        col_number += 1
        row_number = 0
    if normalize:
        for col_number in range(data_array.shape[1]):
            data_array[:, col_number] = data_array[:, col_number] / np.linalg.norm(data_array[:, col_number])
    network = construct_adjacency_graph(range(data_array.shape[0]), data_array, data['dependent'][0]['series'])
    propositionalized = cf_netSDM.hinmine_propositionalize(network, damping)['train_features']['data']
    results_dict = {
        'profile': 'tabular-data-resource',
        'name': 'hinmine-features',
        'data': [],
        'schema': {
            'fields': [],
            'primaryKey': 'id'
        }
    }
    n = propositionalized.shape[0]
    for row_index in range(n):
        instance = {"id": row_index}
        for col_index in range(n):
            instance["feature_%i" % (col_index + 1)] = propositionalized[row_index, col_index]
        results_dict['data'].append(instance)
    for col_index in range(n):
        results_dict['schema']['fields'].append({'name': 'feature_%i' % (col_index + 1), 'type': 'float'})
    io_helper.save_results(json.dumps(results_dict), '', 'text/plain')
def main():
    # Configure logging
    logging.basicConfig(level=logging.INFO)

    # Read inputs
    inputs = io_helper.fetch_data()
    dep_var = inputs["data"]["dependent"][0]
    inped_vars = inputs["data"]["independent"]

    # Check dependent variable type (should be continuous)
    if dep_var["type"]["name"] not in ["integer", "real"]:
        logging.warning("Dependent variable should be continuous !")
        return None

    # Extract data and parameters from inputs
    data = format_data(inputs["data"])

    # Compute linear-regression and generate PFA output
    linear_regression_results = format_output(compute_linear_regression(dep_var, inped_vars, data))

    # Store results
    io_helper.save_results(linear_regression_results, '', 'application/highcharts+json')
Exemple #3
0
def main():
    # Configure logging
    logging.basicConfig(level=logging.INFO)

    # Read inputs
    inputs = io_helper.fetch_data()
    dep_var = inputs["data"]["dependent"][0]
    inped_vars = inputs["data"]["independent"]
    design = get_parameter(inputs["parameters"], DESIGN_PARAM)

    # Check dependent variable type (should be continuous)
    if dep_var["type"]["name"] not in ["integer", "real"]:
        logging.warning("Dependent variable should be continuous !")
        return None

    # Extract data and parameters from inputs
    data = format_data(inputs["data"])

    # Compute anova and generate PFA output
    anova_results = format_output(compute_anova(dep_var, inped_vars, data, design).to_dict())

    # Store results
    io_helper.save_results(anova_results, '', 'application/highcharts+json')
Exemple #4
0
def main():
    # Configure logging
    logging.basicConfig(level=logging.INFO)

    # Read inputs
    inputs = io_helper.fetch_data()
    try:
        dep_var = inputs["data"]["dependent"][0]
    except KeyError:
        logging.warning("Cannot find dependent variables data")
        dep_var = []
    try:
        indep_vars = inputs["data"]["independent"]
    except KeyError:
        logging.warning("Cannot find independent variables data")
        indep_vars = []
    nb_bins = get_bins_param(inputs["parameters"], BINS_PARAM)

    # Compute histograms (JSON formatted for HighCharts)
    histograms_results = compute_histograms(dep_var, indep_vars, nb_bins)

    # Store results
    io_helper.save_results(histograms_results, '',
                           'application/highcharts+json')
import tempfile
import logging
from subprocess import call
from io_helper import io_helper

import preprocess

DEFAULT_DOCKER_IMAGE = 'python-hedwig'

if __name__ == '__main__':
    # Configure logging
    logging.basicConfig(level=logging.INFO)

    # Read inputs
    inputs = io_helper.fetch_data()
    data = inputs["data"]

    out_file = 'input.csv'
    rules_out_file = 'rules.txt'

    matrix, attributes = preprocess.to_matrix(data)
    preprocess.dump_to_csv(matrix, attributes, out_file)

    # Call hedwig with sensible defaults
    examples_file = out_file

    empty_bk = tempfile.mkdtemp()
    call([
        'python', '-m', 'hedwig', empty_bk, examples_file, '-f', 'csv', '-l',
        '-o', rules_out_file, '--nocache'
def main():
    logging.basicConfig(level=logging.INFO)

    inputs = io_helper.fetch_data()

    # Dependent variable for tsne this might be the labels - this is optional
    labels = None
    dependent = inputs["data"].get("dependent", [])
    indep_vars = inputs["data"]["independent"]  # For tsne the data dimensions



    if not data_types_in_allowed(indep_vars, ["integer", "real"]):
        logging.warning("Independent variables should be continuous !")
        return None
    #
    data = format_independent_data(inputs["data"])
    df = pd.DataFrame.from_dict(data)
    source_dimensions = df.shape[1] # number of columns
    num_points = df.shape[0]   # number of samples/points

    convdf = df.apply(lambda x: pd.to_numeric(x))
    # Write the data to a temporary file
    f = tempfile.NamedTemporaryFile(delete=False)
    input = convdf.values.astype(np.float32)
    logging.debug('input {}'.format(input))

    # Get the parameters (optional)
    perplexity = 30
    theta = 0.5
    target_dimensions = 2
    iterations = 1000
    do_zscore = True
    dependent_is_label = True

    try:
        perplexity = get_parameter(inputs['parameters'], 'perplexity', perplexity)
        theta = get_parameter(inputs['parameters'], 'theta', theta)
        target_dimensions = get_parameter(inputs['parameters'], 'target_dimensions', target_dimensions)
        iterations = get_parameter(inputs['parameters'], 'iterations', iterations)
        do_zscore_str = get_parameter(inputs['parameters'], 'do_zscore', str(do_zscore))
        if do_zscore_str == 'True':
            do_zscore = True
        elif do_zscore_str == 'False':
            do_zscore = False
        else:
            raise ValueError
        dependent_is_label_str = get_parameter(inputs['parameters'], 'dependent_is_label', str(dependent_is_label))
        if dependent_is_label_str == 'True':
            dependent_is_label = True
        elif dependent_is_label_str == 'False':
            dependent_is_label = False
        else:
            raise ValueError

    except ValueError as e:
        logging.error("Could not convert supplied parameter to value, error: ", e)
        raise
    except:
        logging.error(" Unexpected error:", sys.exec_info()[0])
        raise
    # Compute results

    if do_zscore:
        input = scipy.stats.zscore(input)

    if len(dependent) > 0 and dependent_is_label:
        dep_var = dependent[0]
        labels = dep_var["series"]

    inputFilePath = f.name
    input.tofile(inputFilePath)
    f.close()

    f = tempfile.NamedTemporaryFile(delete=False)
    outputFilePath = f.name
    f.close()
    output = a_tsne(inputFilePath, outputFilePath, num_points,
                     source_dimensions, target_dimensions, perplexity,
                     theta, iterations)

    logging.debug('output shape {}'.format(output.shape))
    logging.debug('output {}'.format(output))
    chart = generate_scatterchart(output, indep_vars, labels, perplexity, theta, iterations)

    error = ''
    shape = 'application/highcharts+json'

    logging.debug("Highchart: %s", chart)
    io_helper.save_results(chart, error, shape)
    logging.info("Highchart output saved to database.")