Beispiel #1
0
    def POST(self, userurlkey=None):
        url=web.data()
        if len(url) > config.MAX_LEN_URL:
            web.ctx.status="400 Bad request"
            return "url too long"
        if userurlkey is not None:
            print userurlkey
            if len(userurlkey) < config.MIN_LEN_USERURLKEY:
                web.ctx.status="400 Bad request"
                return "key too short"
            if len(userurlkey) > config.MAX_LEN_URLKEY:
                web.ctx.status="400 Bad request"
                return "key too long"
            userurlkey = str.lower(utils.encode_string(userurlkey))
            
        url=utils.encode_string(url)
        if False == utils.check_url(url):
            web.ctx.status="400 Bad request"
            return "bad url"
            
        ret, n_affected, urlkey = model.url_new(url, userurlkey)
        if ret != 0:
            return web.internalerror("db error")

        retval = {
            "is_created": (n_affected==1 or True and False),
            "key": urlkey
        }
        web.ctx.status="200 OK"
        return json.dumps(retval)
    def object_create(self, res_type, obj_id, obj_dict):
        obj_type = res_type.replace('-', '_')
        obj_class = self._get_resource_class(obj_type)

        # Gather column values for obj and updates to backrefs
        # in a batch and write it at the end
        bch = self._obj_uuid_cf.batch()

        obj_cols = {}
        obj_cols['fq_name'] = json.dumps(obj_dict['fq_name'])
        obj_cols['type'] = json.dumps(obj_type)
        if 'parent_type' in obj_dict:
            # non config-root child
            parent_type = obj_dict['parent_type']
            parent_method_type = parent_type.replace('-', '_')
            parent_fq_name = obj_dict['fq_name'][:-1]
            obj_cols['parent_type'] = json.dumps(parent_type)
            parent_uuid = self.fq_name_to_uuid(parent_method_type,
                                               parent_fq_name)
            self._create_child(bch, parent_method_type, parent_uuid, obj_type,
                               obj_id)

        # Properties
        for prop_field in obj_class.prop_fields:
            field = obj_dict.get(prop_field)
            if field is None:
                continue
            if prop_field == 'id_perms':
                field['created'] = datetime.datetime.utcnow().isoformat()
                field['last_modified'] = field['created']

            self._create_prop(bch, obj_id, prop_field, field)

        # References
        # e.g. ref_field = 'network_ipam_refs'
        #      ref_type = 'network-ipam'
        #      ref_link_type = 'VnSubnetsType'
        #      is_weakref = False
        for ref_field in obj_class.ref_fields:
            ref_type, ref_link_type, _ = obj_class.ref_field_types[ref_field]
            refs = obj_dict.get(ref_field, [])
            for ref in refs:
                ref_uuid = self.fq_name_to_uuid(ref_type, ref['to'])
                ref_attr = ref.get('attr')
                ref_data = {'attr': ref_attr, 'is_weakref': False}
                self._create_ref(bch, obj_type, obj_id,
                                 ref_type.replace('-', '_'), ref_uuid,
                                 ref_data)

        bch.insert(obj_id, obj_cols)
        bch.send()

        # Update fqname table
        fq_name_str = ':'.join(obj_dict['fq_name'])
        fq_name_cols = {
            utils.encode_string(fq_name_str) + ':' + obj_id: json.dumps(None)
        }
        self._obj_fq_name_cf.insert(obj_type, fq_name_cols)

        return (True, '')
Beispiel #3
0
def read_character_information(html):
    reponse_character_data = {}
    pattern = re.compile(r'Character Information')
    character_data = html.find(text=pattern).parent.parent.parent.parent
    for data in character_data:
        text = encode_string(data.get_text())
        two_points_index = text.find(':')
        reponse_character_data[camelize(
            text[:two_points_index])] = text[two_points_index + 1:]
    return reponse_character_data
    def fq_name_to_uuid(self, obj_type, fq_name):
        method_name = obj_type.replace("-", "_")
        fq_name_str = ":".join(fq_name)
        col_start = "%s:" % (utils.encode_string(fq_name_str))
        col_fin = "%s;" % (utils.encode_string(fq_name_str))
        try:
            col_info_iter = self._obj_fq_name_cf.xget(method_name, column_start=col_start, column_finish=col_fin)
        except pycassa.NotFoundException:
            raise NoIdError("%s %s" % (obj_type, fq_name))

        col_infos = list(col_info_iter)

        if len(col_infos) == 0:
            raise NoIdError("%s %s" % (obj_type, fq_name))

        for (col_name, col_val) in col_infos:
            obj_uuid = col_name.split(":")[-1]

        return obj_uuid
    def _object_create(self, res_type, obj_ids, obj_dict):
        obj_type = res_type.replace('-', '_')
        obj_class = self._get_resource_class(obj_type)

        # Gather column values for obj and updates to backrefs
        # in a batch and write it at the end
        bch = self._obj_uuid_cf.batch()

        obj_cols = {}
        obj_cols['fq_name'] = json.dumps(obj_dict['fq_name'])
        obj_cols['type'] = json.dumps(obj_type)
        if 'parent_type' in obj_dict:
            # non config-root child
            parent_type = obj_dict['parent_type']
            parent_method_type = parent_type.replace('-', '_')
            parent_fq_name = obj_dict['fq_name'][:-1]
            obj_cols['parent_type'] = json.dumps(parent_type)
            parent_uuid = self.fq_name_to_uuid(parent_method_type, parent_fq_name)
            self._create_child(bch, parent_method_type, parent_uuid, obj_type, obj_ids['uuid'])

        # Properties
        for prop_field in obj_class.prop_fields:
            field = obj_dict.get(prop_field)
            if field is None:
                continue
            if prop_field == 'id_perms':
                field['created'] = datetime.datetime.utcnow().isoformat()
                field['last_modified'] = field['created']

            self._create_prop(bch, obj_ids['uuid'], prop_field, field)

        # References
        # e.g. ref_field = 'network_ipam_refs'
        #      ref_type = 'network-ipam'
        #      ref_link_type = 'VnSubnetsType'
        #      is_weakref = False
        for ref_field in obj_class.ref_fields:
            ref_type, ref_link_type, _ = obj_class.ref_field_types[ref_field]
            refs = obj_dict.get(ref_field, [])
            for ref in refs:
                ref_uuid = self.fq_name_to_uuid(ref_type, ref['to'])
                ref_attr = ref.get('attr')
                ref_data = {'attr': ref_attr, 'is_weakref': False}
                self._create_ref(bch, obj_type, obj_ids['uuid'],
                    ref_type.replace('-', '_'), ref_uuid, ref_data)

        bch.insert(obj_ids['uuid'], obj_cols)
        bch.send()

        # Update fqname table
        fq_name_str = ':'.join(obj_dict['fq_name'])
        fq_name_cols = {utils.encode_string(fq_name_str) + ':' + obj_ids['uuid']: json.dumps(None)}
        self._obj_fq_name_cf.insert(obj_type, fq_name_cols)

        return (True, '')
    def fq_name_to_uuid(self, obj_type, fq_name):
        method_name = obj_type.replace('-', '_')
        fq_name_str = ':'.join(fq_name)
        col_start = '%s:' % (utils.encode_string(fq_name_str))
        col_fin = '%s;' % (utils.encode_string(fq_name_str))
        try:
            col_info_iter = self._obj_fq_name_cf.xget(
                method_name, column_start=col_start, column_finish=col_fin)
        except pycassa.NotFoundException:
            raise NoIdError('%s %s' % (obj_type, fq_name))

        col_infos = list(col_info_iter)

        if len(col_infos) == 0:
            raise NoIdError('%s %s' % (obj_type, fq_name))

        for (col_name, col_val) in col_infos:
            obj_uuid = col_name.split(':')[-1]

        return obj_uuid
Beispiel #7
0
    def POST(self, urlkey):
        url=web.data()
        if len(url) > config.MAX_LEN_URL:
            web.ctx.status="400 Bad request"
            return "url too long"
        if urlkey is not None:
            if len(urlkey) > config.MAX_LEN_URLKEY:
                web.ctx.status="400 Bad request"
                return "key too long"
            urlkey = str.lower(utils.encode_string(urlkey))
            
        url=utils.encode_string(url)
        if False == utils.check_url(url):
            web.ctx.status="400 Bad request"
            return "bad url"
            
        ret, n_affected = model.url_modify(urlkey, url)
        if ret != 0:
            return web.internalerror("db error")

        ret_val={ 'n_affected': n_affected}
        return json.dumps(ret_val)
Beispiel #8
0
def read_character_death_information(html):
    response_character_data = []
    pattern = re.compile(r'Character Deaths')
    character_data = html.find(text=pattern).parent.parent.parent.parent
    for data in character_data:
        text = encode_string(data.get_text())
        cet_index = text.find('CET')
        if cet_index > 1:
            response_character_data.append({
                'date':
                text[:cet_index + 3],
                'killed_by_message':
                text[cet_index + 3:]
            })
    return response_character_data
    def object_delete(self, res_type, obj_uuid):
        obj_type = res_type.replace('-', '_')
        obj_class = self._get_resource_class(obj_type)
        obj_uuid_cf = self._obj_uuid_cf
        try:
            fq_name = json.loads(
                obj_uuid_cf.get(obj_uuid, columns=['fq_name'])['fq_name'])
        except pycassa.NotFoundException:
            raise NoIdError(obj_uuid)
        bch = obj_uuid_cf.batch()

        # unlink from parent
        col_start = 'parent:'
        col_fin = 'parent;'
        col_name_iter = obj_uuid_cf.xget(
            obj_uuid, column_start=col_start, column_finish=col_fin)
        for (col_name, col_val) in col_name_iter:
            (_, parent_type, parent_uuid) = col_name.split(':')
            self._delete_child(
                bch, parent_type, parent_uuid, obj_type, obj_uuid)

        # remove refs
        col_start = 'ref:'
        col_fin = 'ref;'
        col_name_iter = obj_uuid_cf.xget(
            obj_uuid, column_start=col_start, column_finish=col_fin)
        for (col_name, col_val) in col_name_iter:
            (_, ref_type, ref_uuid) = col_name.split(':')
            self._delete_ref(bch, obj_type, obj_uuid, ref_type, ref_uuid)

        # remove link from relaxed back refs
        col_start = 'relaxbackref:'
        col_fin = 'relaxbackref;'
        col_name_iter = obj_uuid_cf.xget(
            obj_uuid, column_start=col_start, column_finish=col_fin)
        for (col_name, col_val) in col_name_iter:
            (_, backref_uuid) = col_name.split(':')
            self._delete_ref(bch, None, backref_uuid, obj_type, obj_uuid)

        bch.remove(obj_uuid)
        bch.send()

        # Update fqname table
        fq_name_str = ':'.join(fq_name)
        fq_name_col = utils.encode_string(fq_name_str) + ':' + obj_uuid
        self._obj_fq_name_cf.remove(obj_type, columns = [fq_name_col])

        return (True, '')
Beispiel #10
0
def read_online_players(html):
    response_character_online = []
    reference_content = html.findAll("tr", {"class": "LabelH"})
    online_players_table = reference_content[0].parent.find_all('tr')
    for playerRow in online_players_table:
        character_data_dict = {}
        count = 0
        cols = playerRow.find_all('td')
        for characterData in cols:
            text = encode_string(characterData.get_text())
            if count == 0:
                character_data_dict['name'] = text
            elif count == 1:
                character_data_dict['level'] = text
            elif count == 2:
                character_data_dict['vocation'] = text
            response_character_online.append(character_data_dict)
            count += 1
    return response_character_online
    def object_delete(self, res_type, obj_uuid):
        obj_type = res_type.replace('-', '_')
        obj_class = self._get_resource_class(obj_type)
        obj_uuid_cf = self._obj_uuid_cf
        try:
            fq_name = json.loads(
                obj_uuid_cf.get(obj_uuid, columns=['fq_name'])['fq_name'])
        except pycassa.NotFoundException:
            raise NoIdError(obj_uuid)
        bch = obj_uuid_cf.batch()

        # unlink from parent
        col_start = 'parent:'
        col_fin = 'parent;'
        col_name_iter = obj_uuid_cf.xget(obj_uuid,
                                         column_start=col_start,
                                         column_finish=col_fin)
        for (col_name, col_val) in col_name_iter:
            (_, parent_type, parent_uuid) = col_name.split(':')
            self._delete_child(bch, parent_type, parent_uuid, obj_type,
                               obj_uuid)

        # remove refs
        col_start = 'ref:'
        col_fin = 'ref;'
        col_name_iter = obj_uuid_cf.xget(obj_uuid,
                                         column_start=col_start,
                                         column_finish=col_fin)
        for (col_name, col_val) in col_name_iter:
            (_, ref_type, ref_uuid) = col_name.split(':')
            self._delete_ref(bch, obj_type, obj_uuid, ref_type, ref_uuid)

        bch.remove(obj_uuid)
        bch.send()

        # Update fqname table
        fq_name_str = ':'.join(fq_name)
        fq_name_col = utils.encode_string(fq_name_str) + ':' + obj_uuid
        self._obj_fq_name_cf.remove(obj_type, columns=[fq_name_col])

        return (True, '')
    def object_create(self, res_type, obj_id, obj_dict):
        obj_type = res_type.replace('-', '_')
        obj_class = self._get_resource_class(obj_type)

        # Gather column values for obj and updates to backrefs
        # in a batch and write it at the end
        bch = self._obj_uuid_cf.batch()

        obj_cols = {}
        obj_cols['fq_name'] = json.dumps(obj_dict['fq_name'])
        obj_cols['type'] = json.dumps(obj_type)
        if 'parent_type' in obj_dict:
            # non config-root child
            parent_type = obj_dict['parent_type']
            parent_method_type = parent_type.replace('-', '_')
            parent_fq_name = obj_dict['fq_name'][:-1]
            obj_cols['parent_type'] = json.dumps(parent_type)
            parent_uuid = self.fq_name_to_uuid(parent_method_type, parent_fq_name)
            self._create_child(bch, parent_method_type, parent_uuid, obj_type, obj_id)

        # Properties
        for prop_field in obj_class.prop_fields:
            field = obj_dict.get(prop_field)
            # Specifically checking for None
            if field is None:
                continue
            if prop_field == 'id_perms':
                field['created'] = datetime.datetime.utcnow().isoformat()
                field['last_modified'] = field['created']

            if prop_field in obj_class.prop_list_fields:
                # store list elements in list order
                # iterate on wrapped element or directly or prop field
                if obj_class.prop_list_field_has_wrappers[prop_field]:
                    wrapper_field = field.keys()[0]
                    list_coll = field[wrapper_field]
                else:
                    list_coll = field

                for i in range(len(list_coll)):
                    self._add_to_prop_list(
                        bch, obj_id, prop_field, list_coll[i], str(i))
            elif prop_field in obj_class.prop_map_fields:
                # iterate on wrapped element or directly or prop field
                if obj_class.prop_map_field_has_wrappers[prop_field]:
                    wrapper_field = field.keys()[0]
                    map_coll = field[wrapper_field]
                else:
                    map_coll = field

                map_key_name = obj_class.prop_map_field_key_names[prop_field]
                for map_elem in map_coll:
                    map_key = map_elem[map_key_name]
                    self._set_in_prop_map(
                        bch, obj_id, prop_field, map_elem, map_key)
            else:
                self._create_prop(bch, obj_id, prop_field, field)

        # References
        # e.g. ref_field = 'network_ipam_refs'
        #      ref_type = 'network-ipam'
        #      ref_link_type = 'VnSubnetsType'
        #      is_weakref = False
        for ref_field in obj_class.ref_fields:
            ref_type, ref_link_type, _ = obj_class.ref_field_types[ref_field]
            refs = obj_dict.get(ref_field, [])
            for ref in refs:
                ref_uuid = self.fq_name_to_uuid(ref_type, ref['to'])
                ref_attr = ref.get('attr')
                ref_data = {'attr': ref_attr, 'is_weakref': False}
                self._create_ref(bch, obj_type, obj_id,
                    ref_type.replace('-', '_'), ref_uuid, ref_data)

        bch.insert(obj_id, obj_cols)
        bch.send()

        # Update fqname table
        fq_name_str = ':'.join(obj_dict['fq_name'])
        fq_name_cols = {utils.encode_string(fq_name_str) + ':' + obj_id:
                        json.dumps(None)}
        self._obj_fq_name_cf.insert(obj_type, fq_name_cols)

        return (True, '')
Beispiel #13
0
for i, row in enumerate(x):
    row = list(row)
    target = row.pop()

    d = {4: 0, 5: 0, 6: 0}
    for key, t_row in t_rows.items():
        if i not in t_row:
            d[predict(roots[key], row, t_columns[key])] += 1

    if max(d.values()) != 0:
        total += 1
        if max(d.items(), key=operator.itemgetter(1))[0] == target:
            cnt += 1
print("accuracy ", cnt / total)
"""
Predictions are made on the testing set using all the trees
"""
f = open('prediction.csv', 'w')
f.write("id,class\n")
y = load_data('testing.csv')
for row in y:
    idx = row[0]
    seq = encode_string(row[1])

    d = {4: 0, 5: 0, 6: 0}
    for i, root in enumerate(roots):
        d[predict(root, seq, t_columns[i])] += 1

    prediction = decode(max(d.items(), key=operator.itemgetter(1))[0])
    f.write(str(idx) + "," + prediction + "\n")
f.close()
Beispiel #14
0
def read_guild_information(html):
    guild_data = {}
    guild_information = html.find('div', {'id': 'GuildInformationContainer'})
    guild_data['guild_information'] = encode_string(
        guild_information.get_text())
    return guild_data
Beispiel #15
0
    ax2.set_xlabel("Word Offset")
    
    plt.savefig('key_words_fig.pdf', format='pdf')

if __name__ == '__main__':
    if len(sys.argv) > 1:
        text = utils.get_text(sys.argv[1])
        if len(sys.argv) > 2:
            print summarize(text, int(sys.argv[2]))
        else:
            print summarize(text)
            
        while True:
            is_drawing = raw_input("Want to draw key words dispersion plot?[y/n]:")
            if is_drawing == "y" or is_drawing == "Y":
                keywords_ranking = word_ranking(text).most_common(15)
                words, scores =  list(zip(*keywords_ranking))
                x, y = dispersion(text, words)
                words = [utils.encode_string(w) for w in words]                
                draw(words, scores, x, y)
                break
            elif is_drawing == "n" or is_drawing == "N":
                break
            else:
                print("Incorrect command.")            
            
        sys.exit(0)
    else:
        print('There is no text to summarize')
        sys.exit(1)