Python Dataset.from_csv Examples

Programming Language: Python

Namespace/Package Name: data

Class/Type: Dataset

Method/Function: from_csv

Examples at hotexamples.com: 3

Python Dataset.from_csv - 3 examples found. These are the top rated real world Python examples of data.Dataset.from_csv extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Dataset(30)

load(11)

load_dataset(5)

batcher(5)

initialize(4)

from_csv(3)

__init__(3)

GetDatafromDict(3)

create_dataset(3)

get_batch(2)

from_openml(2)

generator(2)

get_all_sequences_in_memory(2)

get_train_validation_batch(2)

get_buffer_size(2)

get_dataset(2)

get_vocab(2)

inverse_transform(2)

load_all_images(2)

load_data(2)

create_peptide(2)

get(2)

change_to_Doc2Vec(2)

calculate_observable_rate_bounds(2)

calculate_observable_protection_factors(2)

clean(1)

get_iterator(1)

get_next_batch(1)

get_test_and_valid_data_loaders_map(1)

get_test_batch(1)

get_test_data(1)

get_test_dataloader(1)

get_tokenizer(1)

get_train_batch(1)

get_train_dataloader(1)

batch_num(1)

get_validation_batch(1)

get_image_matrix_rgb(1)

batch(1)

imagefiles2arrs(1)

init_dataset(1)

add_cmd_arguments(1)

add_child(1)

label_vocab(1)

_loadorg_images(1)

__len__(1)

__getitem__(1)

KittiData(1)

get_images_list(1)

get_dev_dataloader(1)

Example #1

Show file

File: eval_classifier.py Project: shi-gy15/DataIntegretion1122

def evaluate_cluster(data_path, model_path, real_path, pred_path):
    dataset = Dataset.from_csv(data_path)
    rows = dataset.rows
    clf = load_model(model_path)
    clusters = cluster_by_classifier(rows, clf)

    write_clusters(clusters, pred_path)
    precision, recall, f1 = val(pred_path, real_path)
    print(f'precision: {precision}')
    print(f'recall: {recall}')
    print(f'f1: {f1}')

Example #2

Show file

File: eval_classifier.py Project: shi-gy15/DataIntegretion1122

    def _prepare(record_filename, data_filename):
        dataset = Dataset.from_csv(record_filename)
        clusters = list(dataset.rows_oc.values())
        all_positive_count = sum(len(v) * (len(v) - 1) / 2 for v in clusters)
        all_count = int(all_positive_count / positive_rate * data_rate)
        n_entities = len(clusters)
        n_positive = n_negative = 0

        with open(data_filename, 'w') as data_out:
            for i in range(all_count):
                if i % 1000 == 0:
                    print(f'[{data_filename}] Process: {i}')
                if random.random() < positive_rate:
                    label = 1
                    while True:
                        e = random.randint(0, n_entities - 1)
                        if len(clusters[e]) > 1:
                            break
                    items = random.sample(clusters[e], 2)
                    assert items[0].cuid == items[1].cuid
                    n_positive += 1
                else:
                    label = 0
                    items = [
                        x[random.randint(0,
                                         len(x) - 1)]
                        for x in random.sample(clusters, 2)
                    ]
                    assert items[0].cuid != items[1].cuid
                    n_negative += 1
                feature = get_similarity(*items)
                data_out.write(
                    f'{items[0].ruid} {items[1].ruid} {label} {" ".join(map(str, feature))}\n'
                )
        print(
            f'[{data_filename}] positive: {n_positive}, negative: {n_negative}, total: {n_positive + n_negative}'
        )

Example #3

Show file

File: regression.py Project: shinhermit/simple-regression-demo


def fit_linear(dataset: Dataset,
               lambdaa=0.1,
               max_iter=10000,
               threshold=0.11) -> Tuple[Vector, float, int]:
    rho = Vector(*[i for i in range(len(dataset.features))])
    error = sqr_error(dataset, rho)
    prev_error = 0
    iter_count = 0
    while iter_count < max_iter and (prev_error == 0
                                     or abs(prev_error - error) > threshold):
        rho = rho - lambdaa * gradient(dataset, rho)
        prev_error, error = error, sqr_error(dataset, rho)
        iter_count += 1
    return rho, error, iter_count


if __name__ == "__main__":
    csv_str = """V_lead,V_iron,V_aluminium,mass
0.3,0.2,0.1,5.246
0.1,0.1,0.4,3.001
0.7,0.3,0.5,11.649
0.4,0.6,0.11,9.5574"""

    rho, sqr_err, nb_iter = fit_linear(Dataset.from_csv(csv_str))

    print("Densities:", rho)
    print("Nb iterations:", nb_iter)
    print("Square error:", sqr_err)