Example #1
0
 def predict(self, rows_csv, count, result_out, id_offset=True):
     with open_compressed(rows_csv, 'rb') as fin:
         with open_compressed(result_out, 'w') as fout:
             reader = csv.reader(fin)
             writer = csv.writer(fout)
             feature_names = list(reader.next())
             writer.writerow(feature_names)
             name_to_pos = {name: i for i, name in enumerate(feature_names)}
             pos_to_decode = {}
             schema = []
             for encoder in self.encoders:
                 pos = name_to_pos.get(encoder['name'])
                 encode = load_encoder(encoder)
                 decode = load_decoder(encoder)
                 if pos is not None:
                     pos_to_decode[pos] = decode
                 schema.append((pos, encode))
             for row in reader:
                 conditioning_row = []
                 to_sample = []
                 if id_offset:
                     row_id = row.pop(0)
                 for pos, encode, in schema:
                     value = None if pos is None else row[pos].strip()
                     observed = bool(value)
                     to_sample.append((not observed))
                     if observed is False:
                         conditioning_row.append(None)
                     else:
                         conditioning_row.append(encode(value))
                 samples = self.query_server.sample(
                     to_sample,
                     conditioning_row,
                     count)
                 for sample in samples:
                     if id_offset:
                         out_row = [row_id]
                     else:
                         out_row = []
                     for name in feature_names:
                         pos = name_to_pos[name]
                         decode = pos_to_decode[pos]
                         val = sample[pos]
                         out_row.append(val)
                     writer.writerow(out_row)
Example #2
0
 def __init__(self, query_server, encoding=None, debug=False):
     self._paths = loom.store.get_paths(query_server.root)
     if encoding is None:
         encoding = self._paths['ingest']['encoding']
     self._query_server = query_server
     self._encoders = json_load(encoding)
     transforms = self._paths['ingest']['transforms']
     self._transform = loom.transforms.load_transforms(transforms)
     self._feature_names = [e['name'] for e in self._encoders]
     self._feature_set = frozenset(self._feature_names)
     self._name_to_pos = {
         name: i
         for i, name in enumerate(self._feature_names)
     }
     self._name_to_decode = {
         e['name']: load_decoder(e)
         for e in self._encoders
     }
     self._name_to_encode = {
         e['name']: load_encoder(e)
         for e in self._encoders
     }
     self._rowid_map = None
     self._debug = debug
Example #3
0
 def __init__(self, query_server, encoding=None, debug=False):
     self._paths = loom.store.get_paths(query_server.root)
     if encoding is None:
         encoding = self._paths['ingest']['encoding']
     self._query_server = query_server
     self._encoders = json_load(encoding)
     transforms = self._paths['ingest']['transforms']
     self._transform = loom.transforms.load_transforms(transforms)
     self._feature_names = [e['name'] for e in self._encoders]
     self._feature_set = frozenset(self._feature_names)
     self._name_to_pos = {
         name: i
         for i, name in enumerate(self._feature_names)
     }
     self._name_to_decode = {
         e['name']: load_decoder(e)
         for e in self._encoders
     }
     self._name_to_encode = {
         e['name']: load_encoder(e)
         for e in self._encoders
     }
     self._rowid_map = None
     self._debug = debug
Example #4
0
    'tags': ['', 'big_data machine_learning platform'],
}
for fluent_type, values in EXAMPLE_VALUES.items():
    EXAMPLE_VALUES['optional_{}'.format(fluent_type)] = [''] + values
EXAMPLE_VALUES['id'] = ['any unique string can serve as an id']

FLUENT_TO_BASIC = {
    'boolean': 'bb',
    'categorical': 'dd',
    'unbounded_categorical': 'dpd',
    'count': 'gp',
    'real': 'nich',
}

encode_bool = load_encoder({'model': 'bb'})
decode_bool = load_decoder({'model': 'bb'})


def get_row_dict(header, row):
    '''By convention, empty strings are omitted from the result dict.'''
    return {key: value for key, value in izip(header, row) if value}


class TransformSequence(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def forward_set(self, feature_set):
        result = set(feature_set)
        for t in self.transforms:
            t.forward_set(result)
Example #5
0
    'tags': ['', 'big_data machine_learning platform'],
}
for fluent_type, values in EXAMPLE_VALUES.items():
    EXAMPLE_VALUES['optional_{}'.format(fluent_type)] = [''] + values
EXAMPLE_VALUES['id'] = ['any unique string can serve as an id']

FLUENT_TO_BASIC = {
    'boolean': 'bb',
    'categorical': 'dd',
    'unbounded_categorical': 'dpd',
    'count': 'gp',
    'real': 'nich',
}

encode_bool = load_encoder({'model': 'bb'})
decode_bool = load_decoder({'model': 'bb'})


def get_row_dict(header, row):
    '''By convention, empty strings are omitted from the result dict.'''
    return {key: value for key, value in izip(header, row) if value}


class TransformSequence(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def forward_set(self, feature_set):
        result = set(feature_set)
        for t in self.transforms:
            t.forward_set(result)