Esempio n. 1
0
def lambda_handler(event, context):
    uuid = event['campaign_uuid']
    type_name = event['type']
    type_id = type_name.replace(' ', '_')

    campaign = fetch_campaign(campaign_path(uuid))
    for type_key in campaign['types']:
        if campaign['types'][type_key]['type'] == type_name:
            typee = campaign['types'][type_key]

    download_overpass_file(uuid, type_id)

    xml_file = open('/tmp/{type_id}.xml'.format(type_id=type_id), 'r')

    tag_name = typee['feature'].split('=')[0]
    start_date = calendar.timegm(
        datetime.datetime.strptime(campaign['start_date'],
                                   '%Y-%m-%d').timetuple()) * 1000
    end_date = calendar.timegm(
        datetime.datetime.strptime(campaign['end_date'],
                                   '%Y-%m-%d').timetuple()) * 1000

    sorted_user_list = osm_object_contributions(xml_file, tag_name, start_date,
                                                end_date)

    save_data(uuid, type_id, sorted_user_list)
Esempio n. 2
0
def test_raster_warping_does_not_overclip_source():
    lyrSrs = "+init=epsg:32630"
    mapSrs = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs'
    lyr = mapnik.Layer('dataraster', lyrSrs)
    if 'gdal' in mapnik.DatasourceCache.plugin_names():
        lyr.datasource = mapnik.Gdal(
            file='../data/raster/dataraster.tif',
            band=1,
        )
        sym = mapnik.RasterSymbolizer()
        sym.colorizer = mapnik.RasterColorizer(mapnik.COLORIZER_DISCRETE,
                                               mapnik.Color(255, 255, 0))
        rule = mapnik.Rule()
        rule.symbols.append(sym)
        style = mapnik.Style()
        style.rules.append(rule)
        _map = mapnik.Map(256, 256, mapSrs)
        _map.background = mapnik.Color('white')
        _map.append_style('foo', style)
        lyr.styles.append('foo')
        _map.layers.append(lyr)
        _map.zoom_to_box(mapnik.Box2d(3, 42, 4, 43))

        im = mapnik.Image(_map.width, _map.height)
        mapnik.render(_map, im)
        # save a png somewhere so we can see it
        save_data('test_raster_warping_does_not_overclip_source.png',
                  im.tostring('png'))
        assert im.view(0, 200, 1, 1).tostring() == '\xff\xff\x00\xff'
Esempio n. 3
0
    def grid_search(self, kwargs):

        make_dir("../evaluations")
        wordNgrams = kwargs["wordNgrams"]
        bucket = kwargs["bucket"]
        lr = kwargs["lr"]
        dim = kwargs["dim"]
        epoch = kwargs["epoch"]
        loss = kwargs["loss"]

        args = product(wordNgrams, bucket, lr, dim, epoch, loss)

        for combinations in args:

            kwargs["wordNgrams"] = combinations[0]
            kwargs["bucket"] = int(combinations[1])
            kwargs["lr"] = combinations[2]
            kwargs["dim"] = combinations[3]
            kwargs["epoch"] = combinations[4]
            kwargs["loss"] = combinations[5]

            parameters = " ".join(
                map(str, [
                    kwargs["wordNgrams"], kwargs["bucket"], kwargs["lr"],
                    kwargs["dim"], kwargs["epoch"], kwargs["loss"]
                ]))

            self.trainClassifier(**kwargs)
            results = "{}\n{}\n\n".format(parameters,
                                          self.testClassifier(kwargs["name"]))
            save_data(directory="../evaluations",
                      name="results.txt",
                      docs=results,
                      mode="a")
def test_raster_with_alpha_blends_correctly_with_background():
    WIDTH = 500
    HEIGHT = 500

    map = mapnik.Map(WIDTH, HEIGHT)
    WHITE = mapnik.Color(255, 255, 255)
    map.background = WHITE

    style = mapnik.Style()
    rule = mapnik.Rule()
    symbolizer = mapnik.RasterSymbolizer()
    symbolizer.scaling = mapnik.scaling_method.BILINEAR

    rule.symbols.append(symbolizer)
    style.rules.append(rule)

    map.append_style('raster_style', style)

    map_layer = mapnik.Layer('test_layer')
    filepath = '../data/raster/white-alpha.png'
    if 'gdal' in mapnik.DatasourceCache.instance().plugin_names():
        map_layer.datasource = mapnik.Gdal(file=filepath)
        map_layer.styles.append('raster_style')
        map.layers.append(map_layer)

        map.zoom_all()

        mim = mapnik.Image(WIDTH, HEIGHT)

        mapnik.render(map, mim)
        save_data('test_raster_with_alpha_blends_correctly_with_background.png',
                  mim.tostring('png'))
        imdata = mim.tostring()
        # All white is expected
        assert contains_word('\xff\xff\xff\xff', imdata)
def test_raster_warping_does_not_overclip_source():
    lyrSrs = "+init=epsg:32630"
    mapSrs = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs'
    lyr = mapnik.Layer('dataraster', lyrSrs)
    if 'gdal' in mapnik.DatasourceCache.instance().plugin_names():
        lyr.datasource = mapnik.Gdal(
            file = '../data/raster/dataraster.tif',
            band = 1,
            )
        sym = mapnik.RasterSymbolizer()
        sym.colorizer = mapnik.RasterColorizer(mapnik.COLORIZER_DISCRETE, mapnik.Color(255,255,0))
        rule = mapnik.Rule()
        rule.symbols.append(sym)
        style = mapnik.Style()
        style.rules.append(rule)
        _map = mapnik.Map(256,256, mapSrs)
        _map.background=mapnik.Color('white')
        _map.append_style('foo', style)
        lyr.styles.append('foo')
        _map.layers.append(lyr)
        _map.zoom_to_box(mapnik.Box2d(3,42,4,43))

        im = mapnik.Image(_map.width,_map.height)
        mapnik.render(_map, im)
        # save a png somewhere so we can see it
        save_data('test_raster_warping_does_not_overclip_source.png',
                  im.tostring('png'))
        assert im.view(0,200,1,1).tostring()=='\xff\xff\x00\xff'
def test_raster_warping():
    lyrSrs = "+init=epsg:32630"
    mapSrs = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs'
    lyr = mapnik.Layer('dataraster', lyrSrs)
    if 'gdal' in mapnik.DatasourceCache.instance().plugin_names():
        lyr.datasource = mapnik.Gdal(
            file = '../data/raster/dataraster.tif',
            band = 1,
            )
        sym = mapnik.RasterSymbolizer()
        sym.colorizer = mapnik.RasterColorizer(mapnik.COLORIZER_DISCRETE, mapnik.Color(255,255,0))
        rule = mapnik.Rule()
        rule.symbols.append(sym)
        style = mapnik.Style()
        style.rules.append(rule)
        _map = mapnik.Map(256,256, mapSrs)
        _map.append_style('foo', style)
        lyr.styles.append('foo')
        _map.layers.append(lyr)
        prj_trans = mapnik.ProjTransform(mapnik.Projection(mapSrs),
                                          mapnik.Projection(lyrSrs)) 
        _map.zoom_to_box(prj_trans.backward(lyr.envelope()))

        im = mapnik.Image(_map.width,_map.height)
        mapnik.render(_map, im)
        # save a png somewhere so we can see it
        save_data('test_raster_warping.png', im.tostring('png'))
        imdata = im.tostring()
        assert contains_word('\xff\xff\x00\xff', imdata)
Esempio n. 7
0
def main(event, context):
    logger.info('got event{}'.format(event))
    uuid = event['campaign_uuid']
    type_name = event['type']
    type_id = type_name.replace(' ', '_')

    campaign = fetch_campaign(
        campaign_path=campaign_path(uuid))
    for type_key in campaign['types']:
        if campaign['types'][type_key]['type'] == type_name:
            typee = campaign['types'][type_key]

    download_overpass_file(uuid, type_id)
    xml_file = open('/tmp/{type_id}.xml'.format(type_id=type_id), 'r')

    parser = CountFeatureParser(typee['feature'])

    try:
        xml.sax.parse(xml_file, parser)
    except xml.sax.SAXParseException:
        print('FAIL')

    output = {
        'type_id': type_id,
        'type_name': type_name,
        'piechart': to_piechart(parser.count)
    }

    save_data(uuid, type_id, output)
Esempio n. 8
0
def generate_challenge(key, mac_key, challenge_size=32, bytes_per_hash=1, 
                       hash_function="sha256", unencrypted_data='',
                       answer=bytes()):
    """ Create a challenge that only the holder of key should be able to solve.
        
        mac_key is required to assure integrity and authenticity of the 
        challenge to the client. 
        
        challenge_size is the total amount of data the client must crack.
        A random challenge of challenge_size is generated, and separated into
        challenge_size / bytes_per_hash subchallenges. The time taken to crack 
        a single subchallenge is O(2**n) (? not sure!), where n is the number 
        of bytes_per_hash. 
        
        hash_function is a string name of an algorithm available in the hashlib module
        
        unencrypted_data is an optional string of data to be packaged with the challenge.
        The data is not kept confidential, but possesses integrity and authenticity
        because of the message authentication code over the entire package.
        
        answer is an optional string, that when supplied, is used instead of a
        random challenge. If supplied, the challenge_size argument has no effect. """        
    answer = answer or random._urandom(challenge_size)
    challenge = encrypt(answer, key, hmac_factory(hash_function), input_block_size=bytes_per_hash)
    package = save_data(challenge, bytes_per_hash, unencrypted_data)
    return (save_data(generate_mac(mac_key, package, hash_function), hash_function, package), 
            answer)
Esempio n. 9
0
def main(event, context):
    logger.info('got event{}'.format(event))
    uuid = event['campaign_uuid']
    type_name = event['type']
    type_id = type_name.replace(' ', '_')

    campaign = fetch_campaign(campaign_path(uuid))
    for type_key in campaign['types']:
        if campaign['types'][type_key]['type'] == type_name:
            typee = campaign['types'][type_key]

    logger.info(typee['tags'])
    required_tags = fix_tags(typee['tags'])
    logger.info(required_tags)

    render_data_path = build_render_data_path(
        campaign_path=campaign_path(uuid), type_id=type_id)

    download_overpass_file(uuid, type_id)

    if 'element_type' in typee:
        element_type = typee['element_type']
    else:
        element_type = None

    xml_file = open('/tmp/{type_id}.xml'.format(type_id=type_id), 'r')
    parser = FeatureCompletenessParser(required_tags, render_data_path,
                                       element_type)

    try:
        xml.sax.parse(xml_file, parser)
    except xml.sax.SAXParseException:
        print('FAIL')
        parser.endDocument()

    processed_data = {
        'type_id':
        type_id,
        'type_name':
        type_name,
        'percentage':
        compute_completeness_pct(features_collected=parser.features_collected,
                                 features_completed=parser.features_completed),
        'features_collected':
        parser.features_collected,
        'features_completed':
        parser.features_completed,
        'checked_attributes':
        list(required_tags.keys()),
        'geojson_files_count':
        parser.geojson_file_manager.count,
        'errors_files_count':
        parser.errors_file_manager.count,
        'error_ids':
        parser.error_ids
    }
    save_data(uuid, type_id, processed_data)
    invoke_download_errors(uuid, type_name)
    invoke_render_feature(uuid, type_name)
    invoke_process_make_vector_tiles(uuid, type_name)
Esempio n. 10
0
def test_raster_warping():
    lyrSrs = "+init=epsg:32630"
    mapSrs = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs'
    lyr = mapnik.Layer('dataraster', lyrSrs)
    if 'gdal' in mapnik.DatasourceCache.plugin_names():
        lyr.datasource = mapnik.Gdal(
            file='../data/raster/dataraster.tif',
            band=1,
        )
        sym = mapnik.RasterSymbolizer()
        sym.colorizer = mapnik.RasterColorizer(mapnik.COLORIZER_DISCRETE,
                                               mapnik.Color(255, 255, 0))
        rule = mapnik.Rule()
        rule.symbols.append(sym)
        style = mapnik.Style()
        style.rules.append(rule)
        _map = mapnik.Map(256, 256, mapSrs)
        _map.append_style('foo', style)
        lyr.styles.append('foo')
        _map.layers.append(lyr)
        prj_trans = mapnik.ProjTransform(mapnik.Projection(mapSrs),
                                         mapnik.Projection(lyrSrs))
        _map.zoom_to_box(prj_trans.backward(lyr.envelope()))

        im = mapnik.Image(_map.width, _map.height)
        mapnik.render(_map, im)
        # save a png somewhere so we can see it
        save_data('test_raster_warping.png', im.tostring('png'))
        imdata = im.tostring()
        assert contains_word('\xff\xff\x00\xff', imdata)
Esempio n. 11
0
def test_multi_tile_policy():
    srs = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs'
    lyr = mapnik.Layer('raster')
    if 'raster' in mapnik.DatasourceCache.instance().plugin_names():
        lyr.datasource = mapnik.Raster(
            file = '../data/raster_tiles/${x}/${y}.tif',
            lox = -180,
            loy = -90,
            hix = 180,
            hiy = 90,
            multi = 1,
            tile_size = 256,
            x_width = 2,
            y_width = 2
            )
        lyr.srs = srs
        _map = mapnik.Map(256, 256, srs)
        style = mapnik.Style()
        rule = mapnik.Rule()
        sym = mapnik.RasterSymbolizer()
        rule.symbols.append(sym)
        style.rules.append(rule)
        _map.append_style('foo', style)
        lyr.styles.append('foo')
        _map.layers.append(lyr)
        _map.zoom_to_box(lyr.envelope())
        
        im = mapnik.Image(_map.width, _map.height)
        mapnik.render(_map, im)
        
        save_data('test_multi_tile_policy.png', im.tostring('png'))
    
        # test green chunk
        eq_(im.view(0,64,1,1).tostring(), '\x00\xff\x00\xff')
        eq_(im.view(127,64,1,1).tostring(), '\x00\xff\x00\xff')
        eq_(im.view(0,127,1,1).tostring(), '\x00\xff\x00\xff')
        eq_(im.view(127,127,1,1).tostring(), '\x00\xff\x00\xff')
    
        # test blue chunk
        eq_(im.view(128,64,1,1).tostring(), '\x00\x00\xff\xff')
        eq_(im.view(255,64,1,1).tostring(), '\x00\x00\xff\xff')
        eq_(im.view(128,127,1,1).tostring(), '\x00\x00\xff\xff')
        eq_(im.view(255,127,1,1).tostring(), '\x00\x00\xff\xff')
    
        # test red chunk
        eq_(im.view(0,128,1,1).tostring(), '\xff\x00\x00\xff')
        eq_(im.view(127,128,1,1).tostring(), '\xff\x00\x00\xff')
        eq_(im.view(0,191,1,1).tostring(), '\xff\x00\x00\xff')
        eq_(im.view(127,191,1,1).tostring(), '\xff\x00\x00\xff')
    
        # test magenta chunk
        eq_(im.view(128,128,1,1).tostring(), '\xff\x00\xff\xff')
        eq_(im.view(255,128,1,1).tostring(), '\xff\x00\xff\xff')
        eq_(im.view(128,191,1,1).tostring(), '\xff\x00\xff\xff')
        eq_(im.view(255,191,1,1).tostring(), '\xff\x00\xff\xff')
Esempio n. 12
0
def test_multi_tile_policy():
    srs = '+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs'
    lyr = mapnik.Layer('raster')
    if 'raster' in mapnik.DatasourceCache.plugin_names():
        lyr.datasource = mapnik.Raster(
            file = '../data/raster_tiles/${x}/${y}.tif',
            lox = -180,
            loy = -90,
            hix = 180,
            hiy = 90,
            multi = 1,
            tile_size = 256,
            x_width = 2,
            y_width = 2
            )
        lyr.srs = srs
        _map = mapnik.Map(256, 256, srs)
        style = mapnik.Style()
        rule = mapnik.Rule()
        sym = mapnik.RasterSymbolizer()
        rule.symbols.append(sym)
        style.rules.append(rule)
        _map.append_style('foo', style)
        lyr.styles.append('foo')
        _map.layers.append(lyr)
        _map.zoom_to_box(lyr.envelope())

        im = mapnik.Image(_map.width, _map.height)
        mapnik.render(_map, im)

        save_data('test_multi_tile_policy.png', im.tostring('png'))

        # test green chunk
        eq_(im.view(0,64,1,1).tostring(), '\x00\xff\x00\xff')
        eq_(im.view(127,64,1,1).tostring(), '\x00\xff\x00\xff')
        eq_(im.view(0,127,1,1).tostring(), '\x00\xff\x00\xff')
        eq_(im.view(127,127,1,1).tostring(), '\x00\xff\x00\xff')

        # test blue chunk
        eq_(im.view(128,64,1,1).tostring(), '\x00\x00\xff\xff')
        eq_(im.view(255,64,1,1).tostring(), '\x00\x00\xff\xff')
        eq_(im.view(128,127,1,1).tostring(), '\x00\x00\xff\xff')
        eq_(im.view(255,127,1,1).tostring(), '\x00\x00\xff\xff')

        # test red chunk
        eq_(im.view(0,128,1,1).tostring(), '\xff\x00\x00\xff')
        eq_(im.view(127,128,1,1).tostring(), '\xff\x00\x00\xff')
        eq_(im.view(0,191,1,1).tostring(), '\xff\x00\x00\xff')
        eq_(im.view(127,191,1,1).tostring(), '\xff\x00\x00\xff')

        # test magenta chunk
        eq_(im.view(128,128,1,1).tostring(), '\xff\x00\xff\xff')
        eq_(im.view(255,128,1,1).tostring(), '\xff\x00\xff\xff')
        eq_(im.view(128,191,1,1).tostring(), '\xff\x00\xff\xff')
        eq_(im.view(255,191,1,1).tostring(), '\xff\x00\xff\xff')
Esempio n. 13
0
def settings_menu(prevMenu):
	STANDARD_MENU.reset(False)
	STANDARD_MENU.allow(AudioBox.ALL)
	STANDARD_MENU.add_callback_function(utilities.tts_change_volume, 'volume', False)
	STANDARD_MENU.add_callback_function(utilities.tts_change_rate, 'rate', False)
	STANDARD_MENU.add_item('save and return to previous menu', False)
	choice = STANDARD_MENU.run('settings', False)
	if choice == 3:
		utilities.save_data([ utilities.SPEECH_VOLUME, utilities.SPEECH_RATE ], 'settings.dat')
		if prevMenu == 'start':
			start_menu()
def test_renders_with_agg():
    sym = mapnik2.GlyphSymbolizer("DejaVu Sans Condensed",
                                  mapnik2.Expression("'í'"))
    sym.allow_overlap = True
    sym.angle = mapnik2.Expression("[azimuth]+90") #+90 so the top of the glyph points upwards
    sym.size = mapnik2.Expression("[value]")
    sym.color = mapnik2.Expression("'#ff0000'")

    _map = create_map_and_append_symbolyzer(sym)
    im = mapnik2.Image(_map.width,_map.height)
    mapnik2.render(_map, im)
    save_data('agg_glyph_symbolizer.png', im.tostring('png'))
    assert contains_word('\xff\x00\x00\xff', im.tostring())
Esempio n. 15
0
def test_renders_with_agg():
    sym = mapnik2.GlyphSymbolizer("DejaVu Sans Condensed",
                                  mapnik2.Expression("'í'"))
    sym.allow_overlap = True
    sym.angle = mapnik2.Expression(
        "[azimuth]+90")  #+90 so the top of the glyph points upwards
    sym.size = mapnik2.Expression("[value]")
    sym.color = mapnik2.Expression("'#ff0000'")

    _map = create_map_and_append_symbolyzer(sym)
    im = mapnik2.Image(_map.width, _map.height)
    mapnik2.render(_map, im)
    save_data('agg_glyph_symbolizer.png', im.tostring('png'))
    assert contains_word('\xff\x00\x00\xff', im.tostring())
Esempio n. 16
0
    def save(self):
        # Holds the save data
        save_data = dict()

        # Loop over the dialogues and utterances in the model
        dialogues = []
        dialogue_index = 0
        for dialogue in self.model.dialogues:

            tmp_dialogue = dict()
            dialogue_index += 1

            utterances = []
            for utterance in dialogue.utterances:

                tmp_utterance = dict()

                # Add speaker, text and labels to utterance
                tmp_utterance['speaker'] = utterance.speaker
                tmp_utterance['text'] = utterance.text
                tmp_utterance['ap_label'] = utterance.ap_label
                tmp_utterance['da_label'] = utterance.da_label

                # Add slots to utterance if they exist
                if utterance.slots is not None:
                    tmp_utterance['slots'] = utterance.slots

                # Add to utterance list
                utterances.append(tmp_utterance)

            # Add id, number of utterances, utterance and scenario to dialogue
            tmp_dialogue['dialogue_id'] = self.model.dataset + "_" + str(
                dialogue_index)
            tmp_dialogue['num_utterances'] = dialogue.num_utterances
            tmp_dialogue['utterances'] = utterances

            # Add scenario to dialogue if is exists
            if dialogue.scenario is not None:
                tmp_dialogue['scenario'] = dialogue.scenario

            # Add to dialogue list
            dialogues.append(tmp_dialogue)

        # Add dataset name and dialogues to save data
        save_data['dataset'] = self.model.dataset
        save_data['num_dialogues'] = self.model.num_dialogues
        save_data['dialogues'] = dialogues

        # Save data to file
        utils.save_data(self.data_path, self.dialogue_file, save_data)
Esempio n. 17
0
def serialize(python_object):
    if isinstance(python_object, dict):
        attributes = python_object.copy()
    elif isinstance(python_object, list):
        attributes = dict((str(index), item) for index, item in enumerate(python_object))
    else:        
        try:
            attributes = python_object.__reduce__()
        except (TypeError, AttributeError):
            attributes = python_object.__dict__.copy()

    sub_structs = []
    for attribute, value in attributes.items():
        if isinstance(value, dict) or isinstance(value, tuple) or isinstance(value, list):
            attributes[attribute] = serialize(value)
            print "Serealized nested object: ", len(attributes[attribute])
            print
            print attributes[attribute]
            print
            sub_structs.append(attribute)
        
    attribute_types = dict((key, type(value)) for key, value in attributes.items())
    struct_type = new_struct_type(python_object.__class__.__name__, **attribute_types)
    struct = struct_type(**attributes)
    
    for _sub in sub_structs:
        print "\n\n\nSubstructure info: ", len(getattr(struct, _sub)), getattr(struct, _sub)
    return utilities.save_data(sub_structs, pack_structure(struct))
Esempio n. 18
0
    def createTestingCorpus(self, df, name):
        """
			Constructs dataframe with test resutls

			Paras:
				None
			Return:
				None
		"""

        df["reviews"] = df["summary"] + ". " + df["reviewText"]
        for _, temp in df.iterrows():
            data = temp.overall + " " + temp.reviews + "\n"
            save_data("../Dataset/test_set/",
                      "test_{}.txt".format(name),
                      data,
                      mode="a")
Esempio n. 19
0
def test_dataraster_coloring():
    srs = '+init=epsg:32630'
    lyr = mapnik.Layer('dataraster')
    if 'gdal' in mapnik.DatasourceCache.plugin_names():
        lyr.datasource = mapnik.Gdal(
            file='../data/raster/dataraster.tif',
            band=1,
        )
        lyr.srs = srs
        _map = mapnik.Map(256, 256, srs)
        style = mapnik.Style()
        rule = mapnik.Rule()
        sym = mapnik.RasterSymbolizer()
        # Assigning a colorizer to the RasterSymbolizer tells the later
        # that it should use it to colorize the raw data raster
        sym.colorizer = mapnik.RasterColorizer(mapnik.COLORIZER_DISCRETE,
                                               mapnik.Color("transparent"))

        for value, color in [
            (0, "#0044cc"),
            (10, "#00cc00"),
            (20, "#ffff00"),
            (30, "#ff7f00"),
            (40, "#ff0000"),
            (50, "#ff007f"),
            (60, "#ff00ff"),
            (70, "#cc00cc"),
            (80, "#990099"),
            (90, "#660066"),
            (200, "transparent"),
        ]:
            sym.colorizer.add_stop(value, mapnik.Color(color))
        rule.symbols.append(sym)
        style.rules.append(rule)
        _map.append_style('foo', style)
        lyr.styles.append('foo')
        _map.layers.append(lyr)
        _map.zoom_to_box(lyr.envelope())

        im = mapnik.Image(_map.width, _map.height)
        mapnik.render(_map, im)
        # save a png somewhere so we can see it
        save_data('test_dataraster_coloring.png', im.tostring('png'))
        imdata = im.tostring()
        # we have some values in the [20,30) interval so check that they're colored
        assert contains_word('\xff\xff\x00\xff', imdata)
Esempio n. 20
0
def test_single(f=None, do_plot=False, do_save=False, title=""):
    if f is None:
        if len(sys.argv) > 1:
            f = float(sys.argv[1])
        else:
            f = 0.4
    if len(sys.argv) > 2:
        max_entities = int(sys.argv[2])
    else:
        max_entities = None
    g = 0.5
    gap_cost = f
    mai = 1
    method = 'binB-LD'
    method = 'LD'
    #method = 'mKlau'
    #method = 'upProgmKlau'
    method = 'progmKlau'
    #method = 'isorankn'
    #method = 'rand'
    seed = np.random.randint(0, 1000000)
    seed = 45398
    pr, re, f1, o1 = single_cer(f,
                                g=g,
                                gap_cost=gap_cost,
                                seed=seed,
                                method=method,
                                n_input_graphs=2,
                                n_duplicates=30,
                                p_keep_edge=0.8,
                                density_multiplier=1.1,
                                n_entities=50,
                                n_input_graph_nodes=50,
                                max_iters=300,
                                max_algorithm_iterations=mai,
                                shuffle=False,
                                max_entities=max_entities)

    if do_save:
        util.save_data(locals(), "single_synthetic_" + title)

    if do_plot:
        plt.plot(o1['Zd_scores'], '-x')
        plt.plot(o1['feasible_scores'], '-o')
        plt.show()
def test_dataraster_coloring():
    srs = '+init=epsg:32630'
    lyr = mapnik.Layer('dataraster')
    if 'gdal' in mapnik.DatasourceCache.instance().plugin_names():
        lyr.datasource = mapnik.Gdal(
            file = '../data/raster/dataraster.tif',
            band = 1,
            )
        lyr.srs = srs
        _map = mapnik.Map(256,256, srs)
        style = mapnik.Style()
        rule = mapnik.Rule()
        sym = mapnik.RasterSymbolizer()
        # Assigning a colorizer to the RasterSymbolizer tells the later
        # that it should use it to colorize the raw data raster
        sym.colorizer = mapnik.RasterColorizer(mapnik.COLORIZER_DISCRETE, mapnik.Color("transparent"))

        for value, color in [
            (  0, "#0044cc"),
            ( 10, "#00cc00"),
            ( 20, "#ffff00"),
            ( 30, "#ff7f00"),
            ( 40, "#ff0000"),
            ( 50, "#ff007f"),
            ( 60, "#ff00ff"),
            ( 70, "#cc00cc"),
            ( 80, "#990099"),
            ( 90, "#660066"),
            ( 200, "transparent"),
        ]:
            sym.colorizer.add_stop(value, mapnik.Color(color))
        rule.symbols.append(sym)
        style.rules.append(rule)
        _map.append_style('foo', style)
        lyr.styles.append('foo')
        _map.layers.append(lyr)
        _map.zoom_to_box(lyr.envelope())

        im = mapnik.Image(_map.width,_map.height)
        mapnik.render(_map, im)
        # save a png somewhere so we can see it
        save_data('test_dataraster_coloring.png', im.tostring('png'))
        imdata = im.tostring()
        # we have some values in the [20,30) interval so check that they're colored
        assert contains_word('\xff\xff\x00\xff', imdata)
def test_renders_with_cairo():
    if not mapnik2.has_pycairo():
        return
    sym = mapnik2.GlyphSymbolizer("DejaVu Sans Condensed",
                                  mapnik2.Expression("'í'"))
    sym.allow_overlap = True
    sym.angle = mapnik2.Expression("[azimuth]+90") #+90 so the top of the glyph points upwards
    sym.size = mapnik2.Expression("[value]")
    sym.color = mapnik2.Expression("'#ff0000'")
    _map = create_map_and_append_symbolyzer(sym)

    from cStringIO import StringIO
    import cairo
    surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, 256, 256)
    mapnik2.render(_map, surface)
    im = mapnik2.Image.from_cairo(surface)
    save_data('cairo_glyph_symbolizer.png', im.tostring('png'))
    assert contains_word('\xff\x00\x00\xff', im.tostring())
Esempio n. 23
0
def test_renders_with_cairo():
    if not mapnik.has_pycairo():
        return
    sym = mapnik.GlyphSymbolizer("DejaVu Sans Condensed",
                                 mapnik.Expression("'í'"))
    sym.allow_overlap = True
    sym.angle = mapnik.Expression(
        "[azimuth]+90")  #+90 so the top of the glyph points upwards
    sym.size = mapnik.Expression("[value]")
    sym.color = mapnik.Expression("'#ff0000'")
    _map = create_map_and_append_symbolyzer(sym)
    if _map:
        from cStringIO import StringIO
        import cairo
        surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, 256, 256)
        mapnik.render(_map, surface)
        im = mapnik.Image.from_cairo(surface)
        save_data('cairo_glyph_symbolizer.png', im.tostring('png'))
        assert contains_word('\xff\x00\x00\xff', im.tostring())
Esempio n. 24
0
    def createTrainingCorpus(self, df, name):
        """
			Creates training data set with labels appended to beginging of each label

			Paras:
				df: datafframe
			Returns:
				None
		"""
        df = df.sample(frac=1).reset_index(drop=True)
        ratings = df["ratings"].tolist()
        reviews = df["reviews"].tolist()

        for rating, review in zip(ratings, reviews):
            doc = "__label__{}".format(rating) + " " + review.strip()
            doc = " ".join(
                [word for word in word_tokenize(doc) if len(word) > 1])
            save_data("../Dataset/training_processed",
                      "{}.txt".format(name),
                      doc + "\n",
                      mode="a")
Esempio n. 25
0
def test_raster_with_alpha_blends_correctly_with_background():
    WIDTH = 500
    HEIGHT = 500

    map = mapnik.Map(WIDTH, HEIGHT)
    WHITE = mapnik.Color(255, 255, 255)
    map.background = WHITE

    style = mapnik.Style()
    rule = mapnik.Rule()
    symbolizer = mapnik.RasterSymbolizer()
    #XXX: This fixes it, see http://trac.mapnik.org/ticket/759#comment:3
    #     (and remove comment when this test passes)
    #symbolizer.scaling="bilinear_old"

    rule.symbols.append(symbolizer)
    style.rules.append(rule)

    map.append_style('raster_style', style)

    map_layer = mapnik.Layer('test_layer')
    filepath = '../data/raster/white-alpha.png'
    if 'gdal' in mapnik.DatasourceCache.instance().plugin_names():
        map_layer.datasource = mapnik.Gdal(file=filepath)
        map_layer.styles.append('raster_style')
        map.layers.append(map_layer)

        map.zoom_all()

        mim = mapnik.Image(WIDTH, HEIGHT)

        mapnik.render(map, mim)
        save_data(
            'test_raster_with_alpha_blends_correctly_with_background.png',
            mim.tostring('png'))
        imdata = mim.tostring()
        # All white is expected
        assert contains_word('\xff\xff\xff\xff', imdata)
def test_raster_with_alpha_blends_correctly_with_background():
    WIDTH = 500
    HEIGHT = 500

    map = mapnik.Map(WIDTH, HEIGHT)
    WHITE = mapnik.Color(255, 255, 255)
    map.background = WHITE

    style = mapnik.Style()
    rule = mapnik.Rule()
    symbolizer = mapnik.RasterSymbolizer()
    #XXX: This fixes it, see http://trac.mapnik.org/ticket/759#comment:3
    #     (and remove comment when this test passes)
    #symbolizer.scaling="bilinear_old"

    rule.symbols.append(symbolizer)
    style.rules.append(rule)

    map.append_style('raster_style', style)

    map_layer = mapnik.Layer('test_layer')
    filepath = '../data/raster/white-alpha.png'
    if 'gdal' in mapnik.DatasourceCache.instance().plugin_names():
        map_layer.datasource = mapnik.Gdal(file=filepath)
        map_layer.styles.append('raster_style')
        map.layers.append(map_layer)

        map.zoom_all()

        mim = mapnik.Image(WIDTH, HEIGHT)

        mapnik.render(map, mim)
        save_data('test_raster_with_alpha_blends_correctly_with_background.png',
                  mim.tostring('png'))
        imdata = mim.tostring()
        # All white is expected
        assert contains_word('\xff\xff\xff\xff', imdata)
Esempio n. 27
0
def test_raster_with_alpha_blends_correctly_with_background():
    WIDTH = 500
    HEIGHT = 500

    map = mapnik.Map(WIDTH, HEIGHT)
    WHITE = mapnik.Color(255, 255, 255)
    map.background = WHITE

    style = mapnik.Style()
    rule = mapnik.Rule()
    symbolizer = mapnik.RasterSymbolizer()
    symbolizer.scaling = mapnik.scaling_method.BILINEAR

    rule.symbols.append(symbolizer)
    style.rules.append(rule)

    map.append_style('raster_style', style)

    map_layer = mapnik.Layer('test_layer')
    filepath = '../data/raster/white-alpha.png'
    if 'gdal' in mapnik.DatasourceCache.plugin_names():
        map_layer.datasource = mapnik.Gdal(file=filepath)
        map_layer.styles.append('raster_style')
        map.layers.append(map_layer)

        map.zoom_all()

        mim = mapnik.Image(WIDTH, HEIGHT)

        mapnik.render(map, mim)
        save_data(
            'test_raster_with_alpha_blends_correctly_with_background.png',
            mim.tostring('png'))
        imdata = mim.tostring()
        # All white is expected
        assert contains_word('\xff\xff\xff\xff', imdata)
Esempio n. 28
0
def test_single(f=None, do_plot=False, do_save=False, title=""):
    if f is None:
        if len(sys.argv) > 1:
            f = float(sys.argv[1])
        else:
            f = 0.4
    if len(sys.argv) > 2:
        max_entities = int(sys.argv[2])
    else:
        max_entities = None
    g = 0.5
    gap_cost = f
    mai = 1
    method = 'binB-LD'
    method = 'LD'
    #method = 'mKlau'
    #method = 'upProgmKlau'
    method = 'progmKlau'
    #method = 'isorankn'
    #method = 'rand'
    seed = np.random.randint(0, 1000000)
    seed = 45398
    pr, re, f1, o1 = single_cer(
        f, g=g, gap_cost=gap_cost, seed=seed, method=method,
        n_input_graphs=2, n_duplicates=30, p_keep_edge=0.8,
        density_multiplier=1.1, n_entities=50, n_input_graph_nodes=50,
        max_iters=300, max_algorithm_iterations=mai, shuffle=False,
        max_entities=max_entities)

    if do_save:
        util.save_data(locals(), "single_synthetic_" + title)

    if do_plot:
        plt.plot(o1['Zd_scores'], '-x')
        plt.plot(o1['feasible_scores'], '-o')
        plt.show()
Esempio n. 29
0
def get_structure_bytestream(structure):
    format_string = ''
    fields_format = []
    values = []
    _values = [] # do nested structs in a second pass afterwards
    for attribute, _type in structure._fields_:
        if _type == ctypes.c_char_p:
            character = str(len(getattr(structure, attribute))) + 's'
        else:
            try:
                character = format_character[_type]
            except KeyError:
                if issubclass(_type, ctypes.Structure):
                    _values.append((attribute, _type))
                    continue
                else:
                    raise
        format_string += character
        fields_format.append((attribute, character))
        value = getattr(structure, attribute)
        if value is None:
            value = 0
        values.append(value)  
    
    # this is a potentially more readable form of the code that follows
    #packed_data = utilities.save_data(format_string) + struct.pack(format_string, *values)
    #for attribute, _type in _values:
    #    packed_data += get_structure_bytestream(getattr(structure, attribute))
    #return packed_data

    name = "{}_{}".format(type(structure).__name__, len(structure._fields_))
  # print "Packing values: ", ([name, fields_format, struct.pack(format_string, *values)] + 
  #                            [get_structure_bytestream(getattr(structure, attribute)) for 
  #                             attribute, _type in _values])
    return utilities.save_data(*[name, fields_format, struct.pack(format_string, *values)] + 
                                [(attribute, get_structure_bytestream(getattr(structure, attribute))) for 
                                 attribute, _type in _values])
Esempio n. 30
0
        start_counter += new_headers_info[index]['length']
    return new_headers_info


def insert_new_field_to_data(new_field_info, data):
    output = ""
    field_name = new_field_info['name']
    start = new_field_info['start']
    length = new_field_info['length'] + 1
    for line in data.split('\n'):
        if is_header_line(line):
            output += line[:start] + field_name.ljust(length) + line[start:]
        elif line == '' or line[0] == ';':
            output += line
        else:
            output += line[:start] + (' ' * length) + line[start:]
        output += '\n'
    return output


if __name__ == "__main__":
    tablatal_data = load_data(args.input)
    header_line = find_header_line(tablatal_data)
    headers_info = get_headers_info_from_line(header_line)
    headers_info[-1]['length'] = get_last_field_length(tablatal_data, headers_info)
    new_field_details = get_new_field_info(headers_info)
    new_headers_info = add_new_field_to_headers_info(new_field_details, headers_info)
    new_field_info = new_headers_info[new_field_details['index']]
    tablatal_data = insert_new_field_to_data(new_field_info, tablatal_data)
    save_data(tablatal_data, args.output)
Esempio n. 31
0
def main():
    parser = argparse.ArgumentParser(usage='sorry, look at readme...', \
            description='arg description', epilog='end')
    parser.add_argument('inputF',
                        help='write the file name of the input text.')
    parser.add_argument('-model',
                        help='select Freq or PPMI.',
                        default='PPMI',
                        choices=['Freq', 'PPMI'])
    parser.add_argument('-outF',
                        help='write the output file name.',
                        default='sample')
    parser.add_argument('-window',
                        help='define the window size.',
                        type=int,
                        default=2)
    parser.add_argument('-iter',
                        help='the number of HITS iteration.',
                        type=int,
                        default=300)
    parser.add_argument('-vocabSize',
                        help='define the vocabulary size. default is all.',
                        type=int,
                        default=None)
    args = parser.parse_args()

    # counting co-occurrence
    util.trace('count the co-occurrence')
    co_occur, word_occur, context_occur = word_graph.extract_context(
        args.inputF, args.window)

    util.trace('vocabulary size of the input data is {}.'.format(
        len(word_occur)))
    if args.vocabSize:
        vocabSize = args.vocabSize
    else:
        vocabSize = len(word_occur)

    # calculate matrix
    util.trace('make matrix (word-graph)')
    matrix, vec = word_graph.make_matrix(co_occur, word_occur, context_occur,
                                         args.model)

    # save data (matrix)
    util.trace('save the matrix')
    util.save_data(matrix,
                   args.outF + '/pmi_matrix_{}.pickle'.format(args.model))
    util.save_data(vec,
                   args.outF + '/pmi_vectorizer_{}.pickle'.format(args.model))

    # get the intial vector
    HITS_obj = hits.HITS(matrix)

    # matrix is symmetry; authority score is equal to hubness score.
    util.trace('start HITS')
    i = HITS_obj.startHITS(args.iter).toarray()
    util.trace('finish HITS')

    # write the ranking words by HITS
    util.trace('write the vocabulary')
    util.writeVocab(HITS_obj, i, vocabSize, args.outF + '/vocab_file.hits')

    util.trace('finish program')
Esempio n. 32
0
def main_menu(task_lists: dict[str, list], current_list: str,
              arguments: Namespace):
    verbose = False
    active_tasks = task_lists.get(current_list)
    while True:
        task_lists[current_list] = active_tasks
        data = format_all_tasks_to_plaintext(task_lists, current_list)
        save_data(data, os.getenv('TOD_FP'))

        print_all_tasks(current_list, active_tasks, verbose)
        raw_command = input('► ')

        cls()
        parsed_command = re.match(r'([A-Za-z]*)(\d+)?:?(\d+)?',
                                  raw_command).groups()
        command, selected_number, dest_number = parsed_command
        if selected_number:
            selected_number = int(selected_number)
        if dest_number:
            dest_number = int(dest_number)
        number_of_tasks = len(active_tasks)
        if selected_number is not None \
                and selected_number >= number_of_tasks \
                and command != 'a':
            print(C.RED + "No such task.\n" + C.NORMAL)
            continue

        cls()

        if raw_command == '':
            show_help()
        elif not command and selected_number is not None:
            task = active_tasks[selected_number]
            time_spent_in_seconds = spend_time_on_task(task.get('name'),
                                                       task.get('notes'),
                                                       arguments.pomodoro)
            prev_time_spent_in_seconds = convert_time_spent_to_seconds(
                task.get('time_spent'))
            total_time_spent = prev_time_spent_in_seconds + time_spent_in_seconds
            formatted_time_spent = format_seconds_to_time_spent(
                total_time_spent)
            updated_task = {**task, 'time_spent': formatted_time_spent}
            tasks.update(active_tasks, updated_task, selected_number)
            print(C.PURPLE + 'Elapsed time added.' + C.NORMAL)
        elif command == 'aa':
            cls()
            while True:
                task_name, task_notes = task_name_input()
                if not task_name:
                    break
                new_task = {
                    'name': task_name,
                    'time_spent': '0:00',
                    'notes': task_notes,
                    'completed': False
                }
                active_tasks = tasks.add(active_tasks, new_task,
                                         selected_number)
            cls()
            print(C.PURPLE + 'Tasks added.' + C.NORMAL)
        elif command == 'al':
            cls()
            new_list_name = list_name_input()
            cls()
            if not new_list_name:
                print(C.RED + 'No name entered.' + C.NORMAL)
                continue
            task_lists[new_list_name] = list()
            current_list = new_list_name
            active_tasks = task_lists[current_list]
            print(C.PURPLE + 'List created.' + C.NORMAL)
        elif command == 'a':
            task_name, task_notes = task_name_input()
            cls()
            if not task_name:
                print(C.RED + 'Cannot add empty task.' + C.NORMAL)
                continue
            new_task = {
                'name': task_name,
                'time_spent': '0:00',
                'notes': task_notes,
                'completed': False
            }
            active_tasks = tasks.add(active_tasks, new_task, selected_number)
            print(C.PURPLE + 'Task added.' + C.NORMAL)
        elif command == 'b':
            if selected_number is None:
                selected_number = task_number_input(number_of_tasks)
            timestamp_before = int(time.time())
            current_number = selected_number
            current_task = active_tasks[selected_number]
            print(C.YELLOW + current_task.get('name') + C.NORMAL + '\n')
            if current_task.get('notes'):
                print(C.GRAY + current_task.get('notes') + C.NORMAL + '\n')
            print('Enter your new broken down tasks:\n')
            while True:
                current_number += 1
                task_name, task_notes = task_name_input()
                if not task_name:
                    break
                new_task = {
                    'name': task_name,
                    'time_spent': '0:00',
                    'notes': task_notes,
                    'completed': False
                }
                active_tasks = tasks.add(active_tasks, new_task,
                                         current_number)
            timestamp_after = int(time.time())
            time_spent_in_seconds = timestamp_after - timestamp_before
            prev_time_spent_in_seconds = convert_time_spent_to_seconds(
                current_task.get('time_spent'))
            total_time_spent = prev_time_spent_in_seconds + time_spent_in_seconds
            formatted_time_spent = format_seconds_to_time_spent(
                total_time_spent)
            updated_task = {**current_task, 'time_spent': formatted_time_spent}
            tasks.update(active_tasks, updated_task, selected_number)
            cls()
            print(C.PURPLE + 'Tasks added.' + C.NORMAL)
        elif command == 'c':
            if selected_number is None:
                selected_number = task_number_input(number_of_tasks)
            cls()
            if selected_number is not None:
                active_tasks = tasks.set_completion(active_tasks,
                                                    selected_number)
                print(C.PURPLE + 'Task updated.' + C.NORMAL)
        elif command == 'dd':
            active_tasks = []
            print(C.PURPLE + 'Tasks deleted.' + C.NORMAL)
        elif command == 'dl':
            list_names = list(task_lists.keys())
            print_all_lists(list_names)
            selected_number = list_number_input(len(list_names))
            cls()
            selected_list = list_names[selected_number]
            del task_lists[selected_list]
            if selected_list == current_list:
                current_list = list_names[0]
            print(C.PURPLE + 'List deleted.' + C.NORMAL)
        elif command == 'd':
            if selected_number is None:
                selected_number = task_number_input(number_of_tasks)
            cls()
            active_tasks = tasks.delete(active_tasks, selected_number)
            print(C.PURPLE + 'Task deleted.' + C.NORMAL)
        elif command == 'e':
            if number_of_tasks == 0:
                print(C.PURPLE + 'No tasks to edit.' + C.NORMAL)
                continue
            if selected_number is None:
                print_all_tasks(current_list, active_tasks)
                selected_number = task_number_input(number_of_tasks)
                cls()
            task = active_tasks[selected_number]
            print('\n' + C.BLUE + "Original Task:" + C.NORMAL)
            name = task['name']
            notes = ': ' + task.get('notes') if task.get('notes') else ''
            time_spent = task['time_spent']
            print(f"\n{name}{notes}\n({time_spent})\n")
            updated_task_name, updated_task_notes = task_name_input(
                name, task['notes'])
            updated_time_spent = task_time_input(time_spent)
            cls()
            updated_task = {
                **task, 'name': updated_task_name,
                'notes': updated_task_notes,
                'time_spent': updated_time_spent
            }
            active_tasks = tasks.update(active_tasks, updated_task,
                                        selected_number)
            print(C.PURPLE + 'Task updated.' + C.NORMAL)
        elif command == 'h':
            show_help()
        elif command == 'l':
            print_all_lists(task_lists)
            list_names = task_lists.keys()
            selected_number = list_number_input(len(list_names))
            cls()
            current_list = list(list_names)[selected_number]
            active_tasks = task_lists.get(current_list)
            print(C.PURPLE + 'List selected.' + C.NORMAL)
        elif command == 'ml':
            if number_of_tasks == 0:
                print(C.PURPLE + 'No tasks to move.' + C.NORMAL)
                continue
            print_all_tasks(current_list, active_tasks)
            if selected_number is None:
                selected_number = task_number_input(number_of_tasks)
            cls()
            list_names = list(task_lists.keys())
            print_all_lists(list_names)
            destination_list_number = int(
                input(f'Move task {selected_number} to which list? '))
            destination_list = task_lists[list_names[destination_list_number]]
            cls()
            active_tasks, destination_list = tasks.move_to_list(
                active_tasks, destination_list, selected_number)
            task_lists[list_names[destination_list_number]] = destination_list
            print(C.PURPLE + 'Task moved.' + C.NORMAL)
        elif command == 'm':
            if number_of_tasks == 0:
                print(C.PURPLE + 'No tasks to move.' + C.NORMAL)
                continue
            print_all_tasks(current_list, active_tasks)
            if selected_number is None:
                selected_number = task_number_input(number_of_tasks)
            if dest_number is None:
                dest_number = input(f'Move task {selected_number} to where? ')
            if not type(dest_number) == int:
                cls()
                print(C.RED + 'Invalid number.' + C.NORMAL)
                continue
            cls()
            dest_number = int(dest_number)
            active_tasks = tasks.move(active_tasks, selected_number,
                                      dest_number)
            print(C.PURPLE + 'Tasks updated.' + C.NORMAL)
        elif command == 'n':
            verbose = True if not verbose else False
            message = 'Notes are now fully visible.' if verbose else 'Notes are now truncated.'
            print(C.PURPLE + message + C.NORMAL)
        elif command == 'q':
            sys.exit()
        elif command == 'r':
            active_tasks = tasks.reduce(active_tasks)
            print(C.PURPLE + 'Tasks reduced.' + C.NORMAL)
        elif command == 's':
            print('Starting new task list...\n')
            active_tasks = start_new_task_list()
            cls()
        elif command == 't':
            spend_time_on_task('Timer', None)
            cls()
        else:
            print(C.WHITE + "Try 'help' for more information." + C.NORMAL)
Esempio n. 33
0
def experiment_multiple_trees(n_reps=1,
                              n_trees=5,
                              n_people=500,
                              methods=('unary', 'LD', 'mKlau'),
                              top_k_matches=5,
                              f_vals=(0.1, 0.5, 1, 1.5, 2),
                              title='genealogical',
                              do_save=True,
                              dir_id=None,
                              rep_offset=0):
    nvv = len(f_vals)
    res_precision = np.zeros((len(methods), nvv, n_reps))
    res_recall = np.zeros((len(methods), nvv, n_reps))
    res_fscore = np.zeros((len(methods), nvv, n_reps))
    res_t = np.zeros((len(methods), nvv, n_reps))
    res_iterations = np.zeros((len(methods), nvv, n_reps))
    res_clusters = np.zeros((len(methods), nvv, n_reps))
    res_lb = np.zeros((len(methods), nvv, n_reps))
    res_ub = np.zeros((len(methods), nvv, n_reps))
    t_beg = time.time()

    start_date_part = str(dt.datetime.now())[:19]
    start_date_part = re.sub(' ', '_', start_date_part)
    start_date_part = re.sub(':', '', start_date_part)
    fname0 = os.path.join("experiment_results",
                          "{}_part_{}.pckl".format(title, start_date_part))

    for r in range(n_reps):
        print "\n--- Repetition {}. ---".format(r + 1)
        # Generate data
        tree_files = extract_ft.get_k_fragments(
            n_trees, n_people, label="first{}".format(r + rep_offset))
        people_index_tuples = []
        for tf in tree_files:
            people, people_dict = person.read_people(tf, clean=True)
            #'family_trees/data/rand_frag_%d/' % i, clean=True)
            index = create_index(people)
            people_index_tuples.append((people, index, people_dict))
        uniq_people = count_unique_people(tree_files)

        for i, f in enumerate(f_vals):
            print "\n  rep={}, f={}".format(r + 1, f)
            for mi, m in enumerate(methods):
                if m.startswith('meLD') and i > 0:
                    # No need to compute fixed entity method for different f values.
                    continue
                print "\n    rep={}, f={}, method={}\n".format(r + 1, f, m)
                t0 = time.time()
                precision, recall, fscore, n_clusters, lb, ub, iters = \
                    merge_multiple(people_index_tuples, 10, top_k_matches,
                                   method=m, uniq_people=uniq_people, f=f)
                res_precision[mi, i, r] = precision
                res_recall[mi, i, r] = recall
                res_fscore[mi, i, r] = fscore
                res_clusters[mi, i, r] = n_clusters
                res_t[mi, i, r] = time.time() - t0
                res_iterations[mi, i, r] = iters
                res_lb[mi, i, r] = lb
                res_ub[mi, i, r] = ub
        if do_save and n_reps > 1:
            pickle.dump(locals(), open(fname0, 'wb'))
            print "Wrote the results of repetition {} to: {}\n".format(
                r + 1, fname0)

    print "\nThe whole experiment took {:2f} seconds.".format(time.time() -
                                                              t_beg)

    if do_save:
        fname = util.save_data(locals(),
                               title,
                               dir_name='genealogy{}'.format(str(dir_id)))
        print "Wrote the results to: {}".format(fname)

    print "F1 score:", np.mean(res_fscore, axis=2)
    print "Precision:", np.mean(res_precision, axis=2)
    print "Recall:", np.mean(res_recall, axis=2)
    print "Time:", np.mean(res_t, axis=2)
    print "Clusters:", np.mean(res_clusters, axis=2)
    print "Lower bounds:", np.mean(res_lb, axis=2)
    print "Upper bounds:", np.mean(res_ub, axis=2)
Esempio n. 34
0
def multiplex_experiment(n_reps=10, title='multiplex', do_save=True,
                         dir_id=None):
    """
    Run an experiment on alignining the (anonymized) layers of a multiplex
    graph.

    Input:
        n_reps -- number of repetitions per setting
    Output:
        Prints some statistics and stores the results to a file.
    """
    shuffle = True
    methods = ('ICM', 'progmKlau', 'upProgmKlau', 'mKlau', 'LD', 'binB-LD5',
               'meLD5_50', 'meLD5_61', 'meLD5_70', 'isorankn', 'LD5')
    g = 0.5
    max_iters = 300
    duplicate_names = 3
    f_values = [0.1, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 2, 2.5, 3, 4, 5]
    nvv = len(f_values)

    fname = os.path.join('multiplex', 'CS-Aarhus_multiplex.edges')

    experiment_seed = np.random.randint(0, 1000000)
    print "--- Experiment seed: {} ---\n".format(experiment_seed)
    random.seed(experiment_seed)
    np.random.seed(experiment_seed)

    res_precision = np.zeros((len(methods), nvv, n_reps))
    res_recall = np.zeros((len(methods), nvv, n_reps))
    res_fscore = np.zeros((len(methods), nvv, n_reps))
    res_iterations = np.zeros((len(methods), nvv, n_reps))
    res_t = np.zeros((len(methods), nvv, n_reps))
    res_clusters = np.zeros((len(methods), nvv, n_reps))
    res_costs = np.zeros((len(methods), nvv, n_reps))
    res_lb = np.zeros((len(methods), nvv, n_reps))  # Lower bounds
    res_ub = np.zeros((len(methods), nvv, n_reps))  # Upper bound

    t_beg = time.time()
    date0 = dt.datetime.now()
    for r in range(n_reps):
        print "\n  Repetition: {}".format(r)
        Gs = read_multiplex_data(fname, n_duplicate_names=duplicate_names)
        for i, f in enumerate(f_values):
            print "\nf={}.\n".format(f)
            cost_params = {'f': f, 'g': g, 'gap_cost': f}
            for j, method in enumerate(methods):
                print "\n  method={}, f={}, rep={}".format(method, f, r)
                max_entities = None
                mai = 1
                if method.startswith('LD') and len(method) > 2:
                    mai = int(method[2:])
                    method = 'LD'
                elif method.startswith('binB-LD') and len(method) > 7:
                    mai = int(method[7:])
                    method = 'binB-LD'
                elif method.startswith('meLD'):
                    if i > 0:
                        # No need to compute fixed entity method for different f values.
                        continue
                    parts = method.split('_')
                    if len(parts[0]) > 4:
                        mai = int(parts[0][4:])
                    max_entities = int(parts[1])
                    method = 'binB-LD'
                t0 = time.time()
                x, o = align_multiple_networks(
                    Gs, cost_params, method=method, max_iters=max_iters,
                    max_algorithm_iterations=mai, max_entities=max_entities,
                    shuffle=shuffle)
                print "Optimization took {:.2f} seconds.".format(time.time() -
                                                                 t0)
                pr, rec, f1 = o['scores']

                res_t[j, i, r] = time.time() - t0
                res_precision[j, i, r] = pr
                res_recall[j, i, r] = rec
                res_fscore[j, i, r] = f1
                res_iterations[j, i, r] = o['iterations']
                res_clusters[j, i, r] = o['n_clusters']
                res_costs[j, i, r] = o['cost']
                res_lb[j, i, r] = o['lb']
                res_ub[j, i, r] = o['ub']
        if do_save and n_reps > 1:
            fname0 = util.save_data(locals(), "multiplex", date0)
            print "Wrote the results of repetition {} to: {}\n".format(r+1, fname0)
    print "\nThe whole experiment took {:2f} seconds.".format(time.time() -
                                                              t_beg)
    if do_save:
        fname = util.save_data(locals(), title, dir_name='multiplex{}'.format(str(dir_id)))
        print "Wrote the results to: {}".format(fname)
    #plot_toy_experiment_results(fname)

    print "F1 score:", np.mean(res_fscore, axis=2)
    print "Precision:", np.mean(res_precision, axis=2)
    print "Recall:", np.mean(res_recall, axis=2)
    print "Time:", np.mean(res_t, axis=2)
    print "Iterations:", np.mean(res_iterations, axis=2)
    print "Clusters:", np.mean(res_clusters, axis=2)
    print "Costs:", np.mean(res_costs, axis=2)
    print "Lower bounds:", np.mean(res_lb, axis=2)
    print "Upper bounds:", np.mean(res_ub, axis=2)
Esempio n. 35
0
def experiment_template(
        n_reps, params, varied_param, cv=False,
        methods=('ICM', 'progmKlau', 'upProgmKlau', 'mKlau', 'LD', 'LD5'),
        title='generic', e_seed=None, dir_id=1000):
    """
    General template for performing experiments.
    
    Input:
        n_reps -- number of repetitions per setting
        params -- all parameters (the parameter to be varied should be a list)
        varied_param -- the name of the parameter to be varied
        cv -- whether to find f and gap_cost through cross-validation

    Output:
        Prints some statistics and stores the results to a file.
    """
    shuffle = True

    if e_seed is None:
        experiment_seed = np.random.randint(0, 1000000)
    else:
        experiment_seed = e_seed
    # experiment_seed = 48574 # Gt yields a better optimum
    print "--- Experiment seed: {} ---\n".format(experiment_seed)
    random.seed(experiment_seed)
    np.random.seed(experiment_seed)

    varied_values = params[varied_param]
    nvv = len(varied_values)
    p = dict(params)  # Current values

    if 'max_entities' not in p:
        p['max_entities'] = None

    res_precision = np.zeros((len(methods), nvv, n_reps))
    res_recall = np.zeros((len(methods), nvv, n_reps))
    res_fscore = np.zeros((len(methods), nvv, n_reps))
    res_iterations = np.zeros((len(methods), nvv, n_reps))
    res_t = np.zeros((len(methods), nvv, n_reps))
    res_clusters = np.zeros((len(methods), nvv, n_reps))
    res_costs = np.zeros((len(methods), nvv, n_reps))
    res_lb = np.zeros((len(methods), nvv, n_reps))  # Lower bounds
    res_ub = np.zeros((len(methods), nvv, n_reps))  # Upper bound

    seeds = []
    for r in range(n_reps):
        seeds.append(np.random.randint(0, 1000000))

    t_beg = time.time()
    date_beg = dt.datetime.now()
    for i, val in enumerate(varied_values):
        p[varied_param] = val
        if varied_param == 'f':
            p['gap_cost'] = val
        print "\n{} {}.\n".format(val, varied_param)
        if cv:
            optimal_params = cross_validate_params(
                'LD', p['n_input_graphs'], p['n_entities'],
                p['n_input_graph_nodes'], p['p_keep_edge'],
                p['density_multiplier'], p['duplicates'], p['max_iters'], 1)
            p['f'] = optimal_params['f']
            optimal_params = cross_validate_params(
                'mKlau', p['n_input_graphs'], p['n_entities'],
                p['n_input_graph_nodes'], p['p_keep_edge'],
                p['density_multiplier'], p['duplicates'], p['max_iters'], 1)
            p['gap_cost'] = optimal_params['gap_cost']
        for r in range(n_reps):
            print "\n  Repetition: {}".format(r)
            seed = seeds[r]
            for j, method in enumerate(methods):
                print "\n  Method: {}\n".format(method)
                max_entities = None
                mai = 1
                if method.startswith('LD') and len(method) > 2:
                    mai = int(method[2:])
                    method = 'LD'
                elif method.startswith('binB-LD') and len(method) > 7:
                    mai = int(method[7:])
                    method = 'binB-LD'
                elif method.startswith('meLD'):
                    if i > 0:
                        # No need to compute fixed entity method for different f values.
                        continue
                    parts = method.split('_')
                    if len(parts[0]) > 4:
                        mai = int(parts[0][4:])
                    max_entities = int(parts[1])
                    method = 'binB-LD'
                t0 = time.time()
                pr, rec, f1, o = single_cer(
                    p['f'], p['g'], p['gap_cost'], seed, method,
                    p['n_input_graphs'], p['n_entities'],
                    p['n_input_graph_nodes'], p['p_keep_edge'],
                    p['density_multiplier'], p['duplicates'], p['max_iters'],
                    mai, shuffle, max_entities)
                res_t[j, i, r] = time.time() - t0
                res_precision[j, i, r] = pr
                res_recall[j, i, r] = rec
                res_fscore[j, i, r] = f1
                res_iterations[j, i, r] = o['iterations']
                res_clusters[j, i, r] = o['n_clusters']
                res_costs[j, i, r] = o['cost']
                res_lb[j, i, r] = o['lb']
                res_ub[j, i, r] = o['ub']
    print "\nThe whole experiment took {:2f} seconds.".format(time.time() -
                                                              t_beg)

    fname = util.save_data(locals(), "synthetic_" + title, dir_name='multiplex{}'.format(str(dir_id)))
    #plot_toy_experiment_results(fname)

    print "F1 score:", np.mean(res_fscore, axis=2)
    print "Precision:", np.mean(res_precision, axis=2)
    print "Recall:", np.mean(res_recall, axis=2)
    print "Time:", np.mean(res_t, axis=2)
    print "Iterations:", np.mean(res_iterations, axis=2)
    print "Clusters:", np.mean(res_clusters, axis=2)
    print "Costs:", np.mean(res_costs, axis=2)
    print "Lower bounds:", np.mean(res_lb, axis=2)
    print "Upper bounds:", np.mean(res_ub, axis=2)
Esempio n. 36
0
def experiment_template(n_reps,
                        params,
                        varied_param,
                        cv=False,
                        methods=('ICM', 'progmKlau', 'upProgmKlau', 'mKlau',
                                 'LD', 'LD5'),
                        title='generic',
                        e_seed=None,
                        dir_id=1000):
    """
    General template for performing experiments.
    
    Input:
        n_reps -- number of repetitions per setting
        params -- all parameters (the parameter to be varied should be a list)
        varied_param -- the name of the parameter to be varied
        cv -- whether to find f and gap_cost through cross-validation

    Output:
        Prints some statistics and stores the results to a file.
    """
    shuffle = True

    if e_seed is None:
        experiment_seed = np.random.randint(0, 1000000)
    else:
        experiment_seed = e_seed
    # experiment_seed = 48574 # Gt yields a better optimum
    print "--- Experiment seed: {} ---\n".format(experiment_seed)
    random.seed(experiment_seed)
    np.random.seed(experiment_seed)

    varied_values = params[varied_param]
    nvv = len(varied_values)
    p = dict(params)  # Current values

    if 'max_entities' not in p:
        p['max_entities'] = None

    res_precision = np.zeros((len(methods), nvv, n_reps))
    res_recall = np.zeros((len(methods), nvv, n_reps))
    res_fscore = np.zeros((len(methods), nvv, n_reps))
    res_iterations = np.zeros((len(methods), nvv, n_reps))
    res_t = np.zeros((len(methods), nvv, n_reps))
    res_clusters = np.zeros((len(methods), nvv, n_reps))
    res_costs = np.zeros((len(methods), nvv, n_reps))
    res_lb = np.zeros((len(methods), nvv, n_reps))  # Lower bounds
    res_ub = np.zeros((len(methods), nvv, n_reps))  # Upper bound

    seeds = []
    for r in range(n_reps):
        seeds.append(np.random.randint(0, 1000000))

    t_beg = time.time()
    date_beg = dt.datetime.now()
    for i, val in enumerate(varied_values):
        p[varied_param] = val
        if varied_param == 'f':
            p['gap_cost'] = val
        print "\n{} {}.\n".format(val, varied_param)
        if cv:
            optimal_params = cross_validate_params(
                'LD', p['n_input_graphs'], p['n_entities'],
                p['n_input_graph_nodes'], p['p_keep_edge'],
                p['density_multiplier'], p['duplicates'], p['max_iters'], 1)
            p['f'] = optimal_params['f']
            optimal_params = cross_validate_params(
                'mKlau', p['n_input_graphs'], p['n_entities'],
                p['n_input_graph_nodes'], p['p_keep_edge'],
                p['density_multiplier'], p['duplicates'], p['max_iters'], 1)
            p['gap_cost'] = optimal_params['gap_cost']
        for r in range(n_reps):
            print "\n  Repetition: {}".format(r)
            seed = seeds[r]
            for j, method in enumerate(methods):
                print "\n  Method: {}\n".format(method)
                max_entities = None
                mai = 1
                if method.startswith('LD') and len(method) > 2:
                    mai = int(method[2:])
                    method = 'LD'
                elif method.startswith('binB-LD') and len(method) > 7:
                    mai = int(method[7:])
                    method = 'binB-LD'
                elif method.startswith('meLD'):
                    if i > 0:
                        # No need to compute fixed entity method for different f values.
                        continue
                    parts = method.split('_')
                    if len(parts[0]) > 4:
                        mai = int(parts[0][4:])
                    max_entities = int(parts[1])
                    method = 'binB-LD'
                t0 = time.time()
                pr, rec, f1, o = single_cer(
                    p['f'], p['g'], p['gap_cost'], seed, method,
                    p['n_input_graphs'], p['n_entities'],
                    p['n_input_graph_nodes'], p['p_keep_edge'],
                    p['density_multiplier'], p['duplicates'], p['max_iters'],
                    mai, shuffle, max_entities)
                res_t[j, i, r] = time.time() - t0
                res_precision[j, i, r] = pr
                res_recall[j, i, r] = rec
                res_fscore[j, i, r] = f1
                res_iterations[j, i, r] = o['iterations']
                res_clusters[j, i, r] = o['n_clusters']
                res_costs[j, i, r] = o['cost']
                res_lb[j, i, r] = o['lb']
                res_ub[j, i, r] = o['ub']
    print "\nThe whole experiment took {:2f} seconds.".format(time.time() -
                                                              t_beg)

    fname = util.save_data(locals(),
                           "synthetic_" + title,
                           dir_name='multiplex{}'.format(str(dir_id)))
    #plot_toy_experiment_results(fname)

    print "F1 score:", np.mean(res_fscore, axis=2)
    print "Precision:", np.mean(res_precision, axis=2)
    print "Recall:", np.mean(res_recall, axis=2)
    print "Time:", np.mean(res_t, axis=2)
    print "Iterations:", np.mean(res_iterations, axis=2)
    print "Clusters:", np.mean(res_clusters, axis=2)
    print "Costs:", np.mean(res_costs, axis=2)
    print "Lower bounds:", np.mean(res_lb, axis=2)
    print "Upper bounds:", np.mean(res_ub, axis=2)
Esempio n. 37
0
def multiplex_experiment(n_reps=10,
                         title='multiplex',
                         do_save=True,
                         dir_id=None):
    """
    Run an experiment on alignining the (anonymized) layers of a multiplex
    graph.

    Input:
        n_reps -- number of repetitions per setting
    Output:
        Prints some statistics and stores the results to a file.
    """
    shuffle = True
    methods = ('ICM', 'progmKlau', 'upProgmKlau', 'mKlau', 'LD', 'binB-LD5',
               'meLD5_50', 'meLD5_61', 'meLD5_70', 'isorankn', 'LD5')
    g = 0.5
    max_iters = 300
    duplicate_names = 3
    f_values = [0.1, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 2, 2.5, 3, 4, 5]
    nvv = len(f_values)

    fname = os.path.join('multiplex', 'CS-Aarhus_multiplex.edges')

    experiment_seed = np.random.randint(0, 1000000)
    print "--- Experiment seed: {} ---\n".format(experiment_seed)
    random.seed(experiment_seed)
    np.random.seed(experiment_seed)

    res_precision = np.zeros((len(methods), nvv, n_reps))
    res_recall = np.zeros((len(methods), nvv, n_reps))
    res_fscore = np.zeros((len(methods), nvv, n_reps))
    res_iterations = np.zeros((len(methods), nvv, n_reps))
    res_t = np.zeros((len(methods), nvv, n_reps))
    res_clusters = np.zeros((len(methods), nvv, n_reps))
    res_costs = np.zeros((len(methods), nvv, n_reps))
    res_lb = np.zeros((len(methods), nvv, n_reps))  # Lower bounds
    res_ub = np.zeros((len(methods), nvv, n_reps))  # Upper bound

    t_beg = time.time()
    date0 = dt.datetime.now()
    for r in range(n_reps):
        print "\n  Repetition: {}".format(r)
        Gs = read_multiplex_data(fname, n_duplicate_names=duplicate_names)
        for i, f in enumerate(f_values):
            print "\nf={}.\n".format(f)
            cost_params = {'f': f, 'g': g, 'gap_cost': f}
            for j, method in enumerate(methods):
                print "\n  method={}, f={}, rep={}".format(method, f, r)
                max_entities = None
                mai = 1
                if method.startswith('LD') and len(method) > 2:
                    mai = int(method[2:])
                    method = 'LD'
                elif method.startswith('binB-LD') and len(method) > 7:
                    mai = int(method[7:])
                    method = 'binB-LD'
                elif method.startswith('meLD'):
                    if i > 0:
                        # No need to compute fixed entity method for different f values.
                        continue
                    parts = method.split('_')
                    if len(parts[0]) > 4:
                        mai = int(parts[0][4:])
                    max_entities = int(parts[1])
                    method = 'binB-LD'
                t0 = time.time()
                x, o = align_multiple_networks(Gs,
                                               cost_params,
                                               method=method,
                                               max_iters=max_iters,
                                               max_algorithm_iterations=mai,
                                               max_entities=max_entities,
                                               shuffle=shuffle)
                print "Optimization took {:.2f} seconds.".format(time.time() -
                                                                 t0)
                pr, rec, f1 = o['scores']

                res_t[j, i, r] = time.time() - t0
                res_precision[j, i, r] = pr
                res_recall[j, i, r] = rec
                res_fscore[j, i, r] = f1
                res_iterations[j, i, r] = o['iterations']
                res_clusters[j, i, r] = o['n_clusters']
                res_costs[j, i, r] = o['cost']
                res_lb[j, i, r] = o['lb']
                res_ub[j, i, r] = o['ub']
        if do_save and n_reps > 1:
            fname0 = util.save_data(locals(), "multiplex", date0)
            print "Wrote the results of repetition {} to: {}\n".format(
                r + 1, fname0)
    print "\nThe whole experiment took {:2f} seconds.".format(time.time() -
                                                              t_beg)
    if do_save:
        fname = util.save_data(locals(),
                               title,
                               dir_name='multiplex{}'.format(str(dir_id)))
        print "Wrote the results to: {}".format(fname)
    #plot_toy_experiment_results(fname)

    print "F1 score:", np.mean(res_fscore, axis=2)
    print "Precision:", np.mean(res_precision, axis=2)
    print "Recall:", np.mean(res_recall, axis=2)
    print "Time:", np.mean(res_t, axis=2)
    print "Iterations:", np.mean(res_iterations, axis=2)
    print "Clusters:", np.mean(res_clusters, axis=2)
    print "Costs:", np.mean(res_costs, axis=2)
    print "Lower bounds:", np.mean(res_lb, axis=2)
    print "Upper bounds:", np.mean(res_ub, axis=2)
Esempio n. 38
0
def experiment_multiple_trees(n_reps=1, n_trees=5, n_people=500,
                              methods=('unary', 'LD', 'mKlau'),
                              top_k_matches=5, f_vals=(0.1, 0.5, 1, 1.5, 2),
                              title='genealogical', do_save=True, dir_id=None,
                              rep_offset=0):
    nvv = len(f_vals)
    res_precision = np.zeros((len(methods), nvv, n_reps))
    res_recall = np.zeros((len(methods), nvv, n_reps))
    res_fscore = np.zeros((len(methods), nvv, n_reps))
    res_t = np.zeros((len(methods), nvv, n_reps))
    res_iterations = np.zeros((len(methods), nvv, n_reps))
    res_clusters = np.zeros((len(methods), nvv, n_reps))
    res_lb = np.zeros((len(methods), nvv, n_reps))
    res_ub = np.zeros((len(methods), nvv, n_reps))
    t_beg = time.time()

    start_date_part = str(dt.datetime.now())[:19]
    start_date_part = re.sub(' ', '_', start_date_part)
    start_date_part = re.sub(':', '', start_date_part)
    fname0 = os.path.join("experiment_results", "{}_part_{}.pckl".format(
        title, start_date_part))

    for r in range(n_reps):
        print "\n--- Repetition {}. ---".format(r+1)
        # Generate data
        tree_files = extract_ft.get_k_fragments(
                n_trees, n_people, label="first{}".format(r+rep_offset))
        people_index_tuples = []
        for tf in tree_files:
            people, people_dict = person.read_people(tf, clean=True)
            #'family_trees/data/rand_frag_%d/' % i, clean=True)
            index = create_index(people)
            people_index_tuples.append((people, index, people_dict))
        uniq_people = count_unique_people(tree_files)

        for i, f in enumerate(f_vals):
            print "\n  rep={}, f={}".format(r+1, f)
            for mi, m in enumerate(methods):
                if m.startswith('meLD') and i > 0:
                    # No need to compute fixed entity method for different f values.
                    continue
                print "\n    rep={}, f={}, method={}\n".format(r+1, f, m)
                t0 = time.time()
                precision, recall, fscore, n_clusters, lb, ub, iters = \
                    merge_multiple(people_index_tuples, 10, top_k_matches,
                                   method=m, uniq_people=uniq_people, f=f)
                res_precision[mi, i, r] = precision
                res_recall[mi, i, r] = recall
                res_fscore[mi, i, r] = fscore
                res_clusters[mi, i, r] = n_clusters
                res_t[mi, i, r] = time.time() - t0
                res_iterations[mi, i, r] = iters
                res_lb[mi, i, r] = lb
                res_ub[mi, i, r] = ub
        if do_save and n_reps > 1:
            pickle.dump(locals(), open(fname0, 'wb'))
            print "Wrote the results of repetition {} to: {}\n".format(r+1, fname0)

    print "\nThe whole experiment took {:2f} seconds.".format(time.time()-t_beg)

    if do_save:
        fname = util.save_data(locals(), title, dir_name='genealogy{}'.format(
            str(dir_id)))
        print "Wrote the results to: {}".format(fname)

    print "F1 score:", np.mean(res_fscore, axis=2)
    print "Precision:", np.mean(res_precision, axis=2)
    print "Recall:", np.mean(res_recall, axis=2)
    print "Time:", np.mean(res_t, axis=2)
    print "Clusters:", np.mean(res_clusters, axis=2)
    print "Lower bounds:", np.mean(res_lb, axis=2)
    print "Upper bounds:", np.mean(res_ub, axis=2)
Esempio n. 39
0
def get_field_lengths(data: list, headers: list) -> dict:
    field_data = {field: len(field) for field in headers}
    for entry in data:
        for key, value in entry.items():
            if value is not None and len(str(value)) > field_data[key]:
                field_data[key] = len(str(value))
    return field_data


def format_parsed_data(data: list, headers: list, lengths: dict) -> str:
    entries = []
    header_line = ""
    for header in headers:
        header_line += header.upper().ljust(lengths[header]) + ' '
    entries.append(header_line.strip())
    for entry in data:
        line = ""
        for header in headers:
            line += (str(entry[header]).ljust(lengths[header]) + ' '
                     if entry[header] is not None
                     else ' ' * lengths[header] + ' ')
        entries.append(line.strip())
    return '\n'.join(entry for entry in entries)


if __name__ == "__main__":
    parsed_data = parse_json_file(args.input)
    tbtl_data = create_tbtl_data(parsed_data)
    save_data(tbtl_data, args.output)