Example #1
0
def store_lines(kw):
    process_name = multiprocessing.current_process().name
    db_index = kw.get('job_index', 0)
    pipe = writer_function

    options = dict(
        max_count=kw.get('max_count', 100000),
        iter_line=caller_function,
        as_set=True,
        pipe=pipe,
        row_index=kw.get('row_index', 1),
        keep_sample=kw.get('sample', False),
        byte_start=kw.get('byte_start', None),
        byte_end=kw.get('byte_end', None),
    )

    try:
        d, sample = parse_csv(**options)
        print('Finished relations. Count: {}. Writing file'.format(len(d)))
    except Exception as e:
        print('Error on "{}" :'.format(process_name), e)
        print(traceback.format_exc())
        d = []

    # pipe.execute()

    return list(d)
Example #2
0
def summarize(filename):
    orig, parsed = list(parse.parse_csv(filename))
    hydro = list(row.mw_available.hydro for row in parsed)
    needed = list(row.mw_available.total for row in parsed)
    # amount of additional power needed to generate
    shortfall = list(n - h for n, h in zip(needed, hydro))
    print_summary_stats('needed power', needed)
    print_summary_stats('hydro', hydro)
    print_summary_stats('hydro shortfall', shortfall)
Example #3
0
def main():
    users  = parse_csv("anonwhipdata.csv")
    jitter = Jitter(users)

    jitter.plot_nom_rate("type")
    jitter.plot_nom_rate("insurance_group", insurance_group)
    jitter.plot_nom_rate("doors")
    jitter.plot_nom_rate("seats")
    jitter.plot_nom_rate("fuel", lambda value: value == "heavy oil" and "diesel" or value)
    jitter.plot_nom_rate("transmission", lambda value:  value and "automatic" in value and "automatic" or value or "None")
    jitter.plot_nom_rate("engine_cc", int_slop(slop=500, top=10000))
    jitter.plot_nom_rate("engine_co2", int_slop(slop=30))
    jitter.plot_nom_rate("has_photos")
    jitter.plot_nom_rate("reg_date", date_to_age)
    jitter.plot_nom_rate("join_date", date_to_age_small)
    jitter.plot_nom_rate("engine_cc", int_slop(slop=200, top=10000), filters=[Filter("car_type", ["car.hatchback"])])
    jitter.plot_nom_rate("engine_cc", int_slop(slop=200, top=10000), filters=[Filter("insurance_group", [10, 3])])
Example #4
0
def main():
    (init, hours) = parse.parse_csv(open(sys.argv[1], 'r'))
    writer = csv.writer(open(sys.argv[2], "w"))

    season = config.get_season(hours[0])
    nuclear = init[-1].mw_drawn.nuclear

    def target_nuclear(inrow):
        # This is a control system to decide how much nuclear power we want.

        # Using the provided numbers, hydro is optimal.
        # But nuclear is second-best, and nuclear power supply is inelastic.
        # So we want to use lots of nuclear, but only after we use as much
        # hydro as possible.

        # If we're using 80% as much power this week, adjust estimates down.
        adjust_factor = max(1, inrow.mw_available.total / inrow.historical_drawn[0])
        predicted_drawn = [drawn * adjust_factor for drawn in inrow.historical_drawn]
        avg_draw = sum(predicted_drawn)/len(predicted_drawn)

        # Hydro appears to be pretty stable.
        predicted_needed = max(0, avg_draw - inrow.mw_available.hydro)

        print('Aiming for {} nuclear power'.format(predicted_needed))
        return predicted_needed

    value_func = {'cost': -1, 'co2': -2, 'green': 0}
    if len(sys.argv) > 3:
        value_func = {
            'cost': float(sys.argv[3]),
            'co2': float(sys.argv[4]),
            'green': float(sys.argv[5]),
        }

    outrows = []
    for hour in hours:
        rate = config.consumer_rate(season, hour.time)
        nuclear = clamp(target_nuclear(hour), nuclear * 0.99, nuclear*1.01)
        power_row, sold = optimizer.optimize(hour, nuclear, value_func)
        outrow = gen_outrow(hour, power_row, sold, rate)
        writer.writerow(outrow.to_row())
        outrows.append(outrow)

    print_summary(outrows)
Example #5
0
def main():
    args = parser.parse_args()

    json_data = csv_parser.parse_csv(
        csv_file=args.csv_file,
        year=args.year,
        schema_file=args.schema_file,
        schema_name=args.schema_name,
        schema_version=args.schema_version,
        csv_schema_mapping=args.csv_schema_mapping,
        company_registry=args.company_registry)

    py_data = json.loads(json_data)

    result = issue_credentials(environment=args.environment,
                               url=args.url,
                               issuer_key=args.issuer_key,
                               data=py_data)

    print(json.dumps(json.loads(result), indent=4))
Example #6
0
def main():
    users = parse_csv("anonwhipdata.csv")
    jitter = Jitter(users)

    jitter.plot_nom_rate("type")
    jitter.plot_nom_rate("insurance_group", insurance_group)
    jitter.plot_nom_rate("doors")
    jitter.plot_nom_rate("seats")
    jitter.plot_nom_rate(
        "fuel", lambda value: value == "heavy oil" and "diesel" or value)
    jitter.plot_nom_rate(
        "transmission", lambda value: value and "automatic" in value and
        "automatic" or value or "None")
    jitter.plot_nom_rate("engine_cc", int_slop(slop=500, top=10000))
    jitter.plot_nom_rate("engine_co2", int_slop(slop=30))
    jitter.plot_nom_rate("has_photos")
    jitter.plot_nom_rate("reg_date", date_to_age)
    jitter.plot_nom_rate("join_date", date_to_age_small)
    jitter.plot_nom_rate("engine_cc",
                         int_slop(slop=200, top=10000),
                         filters=[Filter("car_type", ["car.hatchback"])])
    jitter.plot_nom_rate("engine_cc",
                         int_slop(slop=200, top=10000),
                         filters=[Filter("insurance_group", [10, 3])])
Example #7
0
        yvals.append(yval)
        print "x: " + str(xval) + ", y: " + str(yval)
    return xvals, yvals

#takes x,y values, plots them
# also x,y -> angles -> xy & plots 
def plotIKcircle(temp): 
    xvals, yvals = temp
    plt.plot(xvals, yvals, 'rd') #blue squares
    thetavals = [get_angles(tempx, tempy) for tempx, tempy in zip(xvals, yvals)]
    for i in range(len(thetavals)):
            print thetavals[i]
    IKvals = [angles2xy(theta1, theta2) for theta1, theta2 in thetavals] 
    tempx2, tempy2 = zip(*IKvals)
    plt.axis('equal')
    plt.plot(tempx2, tempy2, 'b.')
    #print "x: " + str(tempx2) + ", y: " + str(tempy2)

def plot_theta_vals(thetavals):
    IKvals = [angles2xy(theta1, theta2) for theta1, theta2 in thetavals] 
    tempx2, tempy2 = zip(*IKvals)
    plt.axis('equal')
    plt.plot(tempx2, tempy2, 'ro')

thetavals = parse.parse_csv()
plot_theta_vals(thetavals)

plotworkingenvelope()
#plotIKcircle(circle(-100,0))
plt.show()
# Imported modules
import parse as p

f = input("Please input a file. > ")
cell = int(
    input(
        "Which column contains the data you would like counted? e.g.[1,2,3...] > "
    )) - 1

parsed = p.parse_csv(f)
data = p.pull(parsed, cell)
print(p.items(data))
    configs = {
        fn.strip("/."): read_config(fn)
        for fn in folder_names if read_config(fn)
    }

    # Get the build context
    API_KEY = os.environ.get('GM_API_KEY')
    API_KEY_DEV = os.environ.get('GM_API_KEY_DEV')
    context = {'GM_API_KEY': API_KEY, "map_configs": configs}
    print("BUILDING WITH CONTEXT")
    print(json.dumps(context, indent=4))

    # Build the home index.html
    env.get_template('home.html').stream(context).dump("index.html")

    # Build the admin index.html
    env.get_template('admin.html').stream(context).dump("draw/index.html")

    # Build each of the map folders (in order of folder name alphabetically)
    for dir_name, config in configs.items():
        # Make the data.json file
        if len(sys.argv) <= 1:
            parse_csv(f"{dir_name}/data.csv", config, API_KEY_DEV)

        # Get the template context
        context.update({"config": config})

        # Render the template to an index.html
        env.get_template('map.html').stream(context).dump(
            f"{dir_name}/index.html")
Example #10
0
from parse import parse_csv
from predict import RegDatePredictor, InsuranceGroupPredictor, RegDateInsPredictor, AllPredictor, TypePredictor, EngineCCPredictor, EngineCO2Predictor, EngineBothPredictor
import math

users      = parse_csv("anonwhipdata.csv")
predictors = [p(users) for p in [RegDatePredictor, InsuranceGroupPredictor, RegDateInsPredictor, AllPredictor, TypePredictor, EngineCCPredictor, EngineCO2Predictor, EngineBothPredictor]]
sqdiffs    = [0] * len(predictors)
matches    = [0] * len(predictors)
sqcounts   = [0] * len(predictors)

for user in users:
    if user.monthly_rate:
        for i, predictor in enumerate(predictors):
            result      = predictor.predict(user)
            if result:
                difference  = user.monthly_rate - result.price
                sqdiffs[i]  += difference ** 2
                sqcounts[i] += 1
                matches[i]  += result.matches

for i, predictor in enumerate(predictors):
    print "%- 25s %0.2f % 5s" % (predictor.__class__.__name__, math.sqrt(sqdiffs[i] / sqcounts[i]), matches[i] / sqcounts[i])
Example #11
0
from pptx import Presentation
from pptx.util import Inches
from parse import parse_csv, bonus_csv
from slides import fill_slide, bonus_card

presentation = Presentation()
presentation.slide_width = Inches(2.5)
presentation.slide_height = Inches(3.5)
cards = parse_csv('cards.csv')
bonus = bonus_csv('bonus.csv')


def add_slide_from_card(c):
    layout = presentation.slide_layouts[6]
    slide = presentation.slides.add_slide(layout)
    fill_slide(slide, c)


def add_bonus_card(c):
    layout = presentation.slide_layouts[6]
    slide = presentation.slides.add_slide(layout)
    bonus_card(slide, c)


for card in cards:
    add_slide_from_card(card)

for card in bonus:
    add_bonus_card(card)

presentation.save('cards.pptx')
Example #12
0
#takes x,y values, plots them
# also x,y -> angles -> xy & plots
def plotIKcircle(temp):
    xvals, yvals = temp
    plt.plot(xvals, yvals, 'rd')  #blue squares
    thetavals = [
        get_angles(tempx, tempy) for tempx, tempy in zip(xvals, yvals)
    ]
    for i in range(len(thetavals)):
        print thetavals[i]
    IKvals = [angles2xy(theta1, theta2) for theta1, theta2 in thetavals]
    tempx2, tempy2 = zip(*IKvals)
    plt.axis('equal')
    plt.plot(tempx2, tempy2, 'b.')
    #print "x: " + str(tempx2) + ", y: " + str(tempy2)


def plot_theta_vals(thetavals):
    IKvals = [angles2xy(theta1, theta2) for theta1, theta2 in thetavals]
    tempx2, tempy2 = zip(*IKvals)
    plt.axis('equal')
    plt.plot(tempx2, tempy2, 'ro')


thetavals = parse.parse_csv()
plot_theta_vals(thetavals)

plotworkingenvelope()
#plotIKcircle(circle(-100,0))
plt.show()
Example #13
0
from parse import parse_csv
from predict import RegDatePredictor, InsuranceGroupPredictor, RegDateInsPredictor, AllPredictor, TypePredictor, EngineCCPredictor, EngineCO2Predictor, EngineBothPredictor
import math

users = parse_csv("anonwhipdata.csv")
predictors = [
    p(users) for p in [
        RegDatePredictor, InsuranceGroupPredictor, RegDateInsPredictor,
        AllPredictor, TypePredictor, EngineCCPredictor, EngineCO2Predictor,
        EngineBothPredictor
    ]
]
sqdiffs = [0] * len(predictors)
matches = [0] * len(predictors)
sqcounts = [0] * len(predictors)

for user in users:
    if user.monthly_rate:
        for i, predictor in enumerate(predictors):
            result = predictor.predict(user)
            if result:
                difference = user.monthly_rate - result.price
                sqdiffs[i] += difference**2
                sqcounts[i] += 1
                matches[i] += result.matches

for i, predictor in enumerate(predictors):
    print "%- 25s %0.2f % 5s" % (predictor.__class__.__name__,
                                 math.sqrt(sqdiffs[i] / sqcounts[i]),
                                 matches[i] / sqcounts[i])
Example #14
0
def train_and_test(data_dir, baseline='neg'):
    pos_samples = parse_csv('{}/pos.csv'.format(data_dir))
    neg_samples = parse_csv('{}/neg.csv'.format(data_dir))

    pos_samples_l = [pos_sample + [1] for pos_sample in pos_samples]
    neg_samples_l = [neg_sample + [0] for neg_sample in neg_samples]

    all_samples_labelled = pos_samples_l + neg_samples_l

    X = np.array([sample[:-1] for sample in all_samples_labelled])
    y = np.array([sample[-1] for sample in all_samples_labelled])
    classifiers = []
    res = {}  # Dictionary for results

    # LIST OF CLASSIFIERS USED, SEE USED PARAMETERS IN validation.py
    # Seed (random_state) fixed to 23 (chosen arbitrarily) for reproducibility

    classifiers.append((None, 'Baseline'))
    model1 = xgboost.XGBClassifier(random_state=23)
    classifiers.append((model1, 'XGB'))
    model2 = svm.SVC(probability=True, random_state=23)
    classifiers.append((model2, 'SVC'))
    model3 = tree.DecisionTreeClassifier(random_state=23)
    classifiers.append((model3, 'Decision Tree'))
    model4 = RandomForestClassifier(random_state=23)
    classifiers.append((model4, 'Random Forest'))
    model5 = GaussianNB()
    classifiers.append((model5, 'Gaussian Naive Bayes'))
    model6 = SGDClassifier(random_state=23)
    classifiers.append((model6, 'SGD'))
    model7 = KNeighborsClassifier()
    classifiers.append((model7, 'K-neighbors'))
    model8 = BaggingClassifier(tree.DecisionTreeClassifier(random_state=23),
                               random_state=23)
    classifiers.append((model8, 'Bagged Decision Trees 1'))
    model9 = BaggingClassifier(tree.DecisionTreeClassifier(random_state=23),
                               random_state=23)
    classifiers.append((model9, 'Bagged Decision Trees 2'))
    model10 = ExtraTreesClassifier(random_state=23)
    classifiers.append((model10, 'Extra Trees'))
    model11 = AdaBoostClassifier(random_state=23)
    classifiers.append((model11, 'AdaBoost 1'))
    model12 = AdaBoostClassifier(random_state=23)
    classifiers.append((model12, 'AdaBoost 2'))
    model13 = GradientBoostingClassifier(random_state=23)
    classifiers.append((model13, 'Gradient Boosting'))

    # Random train and test splits using 80% as training set and 20% as test set
    # Seed (random_state) fixed to 23 (chosen arbitrarily) for reproducibility
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        shuffle=True,
                                                        random_state=23)

    opt_classifiers = validate(X_train, y_train, classifiers)

    for clf_tup in opt_classifiers:
        if clf_tup[1] == 'Baseline':
            # Baseline prediction, all positive or all negative
            # Always negative in our case since there are fewer SBRs than NSBRs
            y_pred = [
                0 if baseline == 'neg' else 1 for j in range(len(X_test))
            ]
        else:
            clf = clf_tup[
                0]  # Already tuned classifier (parameters in validation.py)
            clf.fit(X_train, y_train)  # TRAINING
            y_pred = clf.predict(X_test)  # TESTING
            y_score = np.array([
                p[1] for p in clf.predict_proba(X_test)
            ])  # Probabilities for precision / recall curve

        cm = confusion_matrix(y_test, y_pred)
        clf_name = clf_tup[
            1]  # Classifier name to identify which classifier is used
        res[clf_name] = {}

        res[clf_name]['TN'] = cm[0][0]
        res[clf_name]['FP'] = cm[0][1]
        res[clf_name]['FN'] = cm[1][0]
        res[clf_name]['TP'] = cm[1][1]
        res[clf_name]['acc'] = accuracy_score(y_test, y_pred)

        if clf_tup[1] == 'Baseline':
            if baseline == 'neg':
                res[clf_name]['prec'] = float(
                    'NaN')  # no true positives, no false positives
                res[clf_name]['rec'] = 0.0  # no true positives
                res[clf_name]['fsco'] = float('NaN')
            else:
                res[clf_name]['prec'] = cm[1][1] / (cm[1][1] + cm[0][1]
                                                    )  # TP / (TP + FP)
                res[clf_name]['rec'] = 1.0  # no false negatives
                res[clf_name]['fsco'] = 2 / (1 + 1 / res[clf_name]['prec']
                                             )  # Harmonic mean
        else:
            prec, rec, fsco, _ = precision_recall_fscore_support(
                y_test, y_pred, pos_label=1, average='binary')
            res[clf_name]['prec'] = prec  # precision computed by scikit-learn
            res[clf_name]['rec'] = rec  # recall computed by scikit-learn
            res[clf_name]['fsco'] = fsco  # F-score computed by scikit-learn

            average_precision = average_precision_score(
                y_test, y_score)  # average precision computed by scikit-learn
            precision, recall, _ = precision_recall_curve(
                y_test, y_score)  # Precision recall values to be plotted

            # PLOTTING PRECISION RECALL CURVE

            step_kwargs = ({'step': 'post'})
            plt.step(recall, precision, color='b', alpha=0.2, where='post')
            plt.fill_between(recall,
                             precision,
                             alpha=0.2,
                             color='b',
                             **step_kwargs)

            plt.xlabel('Recall')
            plt.ylabel('Precision')
            plt.ylim([0.0, 1.05])
            plt.xlim([0.0, 1.0])
            plt.title('{}: 2-class Precision-Recall curve: AP={:0.2f}'.format(
                clf_name, average_precision))
            # plt.show()  # Uncomment to show plots

    # Displaying results
    print('\n{:32s} {:4s} {:4s} {:4s}  {:4s} {:10s}{:10s}    {:10s}{:10s}'.
          format('', 'TP', 'FP', 'FN', 'TN', 'Accuracy', 'Precision', 'Recall',
                 'F-score'))
    for clf in res.keys():
        curr = res[clf]
        print('{:30s} {:4d} {:4d} {:4d}  {:4d} {:10f} {:10f} {:10f} {:10f}'.
              format(clf, curr['TP'], curr['FP'], curr['FN'], curr['TN'],
                     curr['acc'], curr['prec'], curr['rec'], curr['fsco']))
Example #15
0
        subcategories = [E.category(
                E.id(subcategory.id),
                E.title(subcategory.title)
                )
                         for subcategory in programme['subcategories']
                         if subcategory.parent is category]

        if subcategories:
            category_node.append(E.categories(*subcategories))

        node.append(category_node)

    return node


programme_dicts, categories, subcategories = parse_csv()


for programme in programme_dicts:
    with open('xml/{}.xml'.format(programme['pid']), 'w') as xml_file:
        xml = ET.tostring(
            E.programme(
                E.pid(programme['pid']),
                E.complete_title(
                    unicode(
                        programme['complete_title'], 'utf-8'
                        )),
                E.media_type(programme['media_type']),
                E.masterbrand(programme['masterbrand']),
                E.brand_pid(programme['brand_pid']),
                E.is_clip(programme['is_clip']),
Example #16
0
 def test_parse_csv(self):
     for filename in self.FILES:
         with open(filename) as csv_file:
             parse.parse_csv(csv_file)