예제 #1
0
def tagging_function(file_name, label_file, images, size, collection):
    for l_name, l_file in label_file.items():
        if file_name.split('.')[0] + '.' + l_name.split('.')[1] == l_name:
            if l_name.split('.')[1] == 'xml':
                l_file = base64.b64decode(l_file)
                clf.segmentation(images, size, l_file, collection)

            elif l_name.split('.')[1] == 'json':
                l_file = base64.b64decode(l_file)
                cjf.segmentation(images, size, l_file)
            else:
                clf.default(images, size)
        else:
            continue
예제 #2
0
def spider(
    spider_dict
):  # spider_dict 是后端从前端post表单获取到用户给的参数形式为{'keyword':xx, 'spider_page_num':xx, 'start_page':xx, 'number':xx}
    """爬虫任务"""
    from DataSet.rabbitMQ.task_queue_client import spider_task
    with app.app_context():
        site_list = list()
        print spider_dict
        spider_image_list = spider_task(spider_dict)
        spider_image_list = json.loads(spider_image_list)
        print spider_image_list
        for spider_image in spider_image_list:
            file = urllib2.urlopen(spider_image)
            tmpIm = cStringIO.StringIO(file.read())
            # c = tmpIm.read()s
            site = fun.upload(tmpIm.read(), file_ext_name='jpg')
            site_list.append(site)

            # 数据库存储
            for spider_file in site_list:
                image = Image()
                image.name = spider_file.get('filename')
                image.site = spider_file.get('file_id')
                spider_dict1 = json.loads(spider_dict)
                print(spider_dict1['collection_id'])
                image.collection_id = spider_dict1['collection_id']
                print image.collection_id
                db.session.add(image)
                db.session.commit()
                images = Image.query.filter_by(name=image.name).first()

                image_url_path = current_app.config[
                    'NGINX_SITE'] + fun.getInfo(images.site)['group_file_id']

                file = urllib2.urlopen(image_url_path)
                size = image_size(file.read())

                clf.default(images, size)

            collection_id = spider_dict1['collection_id']

            same_images_clean(collection_id)

        return site_list
예제 #3
0
def upload_image(collection_name):
    """
    上传图片 分类/人脸
    """
    local_time = time.time()
    try:
        collection = Collection.query.filter_by(
            user_id=g.user.id, name=collection_name).first()  # 当前集合
        if collection is None:
            return jsonify(err_no=RET.PARAMERR, err_desc='没有该数据集')
    except Exception as e:
        current_app.logger.error(e)
        return jsonify(err_no=RET.PARAMERR, err_desc='参数有误')

    status = request.form['status']

    if not status:
        return jsonify(err_on=RET.PARAMERR, err_desc='参数不完整')
    upload_image_site = request.files.getlist('upload_image_site')  # 图片文件
    table_site = request.files.get('table_site')  # 对应表文件
    label_id = [l_id.label_id for l_id in collection.labels]
    label_name = [l_name.name.encode('utf-8') for l_name in collection.labels]
    if status == 'add_label':
        # 新增
        new_label = request.form['new_label']
        if new_label is None:
            return jsonify(err_no=RET.NODATA, err_desc='名称参数缺失')

        if new_label.split(':')[0] in label_id or new_label.split(
                ':')[1] in label_name:
            return jsonify(err_no=RET.NODATA, err_desc='该标签或id以存在')

        try:
            label = Label()
            label.name = new_label.split(':')[1]
            label.label_id = new_label.split(':')[0]
            label.collection_id = collection.id
            db.session.add(label)
            db.session.commit()
        except Exception as e:
            current_app.logger.error(e)
            db.session.rollback()
        return jsonify(err_no='ok')

    if not upload_image_site:
        return jsonify(errno=RET.PARAMERR, errmsg='参数缺失')

    if status == 'appoint_table':
        # 对应表
        if table_site is None:
            return jsonify(errno=RET.PARAMERR, errmsg='参数缺失')
        try:
            table_name = table_site.read()
            table_list = []
            for i in table_name.split('\r\n'):
                table_list.append(i.split(':')[0])
            for up_file in upload_image_site:
                tables = table_name.split(up_file.filename + ':')[1]
                images = storage(up_file, collection)
                size = image_size(up_file)
                labels = Label.query.filter_by(
                    name=tables.split('\r\n')[0]).first()
                if up_file.filename in table_list and \
                        labels is not None:

                    # 标注上传
                    clf.classification(images, size, labels.label_id,
                                       labels.name)

                else:
                    clf.default(images, size)
        except Exception as e:
            current_app.logger.error(e)
            db.session.rollback()
            return jsonify(err_no=RET.DBERR, err_desc='图片保存失败')
        return jsonify(err_no=RET.OK, err_desc='OK')

    if status == 'default':
        # 未标注
        if not upload_image_site:
            return jsonify(errno=RET.PARAMERR, errmsg='参数缺失')
        try:
            for up_file in upload_image_site:
                images = storage(up_file, collection)
                size = image_size(up_file)
                clf.default(images, size)
        except Exception as e:
            current_app.logger.error(e)
            db.session.rollback()
            return jsonify(err_no=RET.DBERR, err_desc='图片保存失败')
        return jsonify(err_no=RET.OK, err_desc='OK')

    if status.split(':')[1] in label_name:
        # 标注上传
        try:
            label_data = Label.query.filter_by(
                name=status.split(':')[1]).first()
            if label_data is None:
                return jsonify(err_no=RET.PARAMERR, err_desc='没有此标签')
            if not upload_image_site:
                return jsonify(errno=RET.PARAMERR, errmsg='参数缺失')

            if int(status.split(':')[0]) != label_data.label_id:
                return jsonify(errno=RET.PARAMERR, errmsg='错误的标签ID')
            for up_file in upload_image_site:
                images = storage(up_file, collection)
                size = image_size(up_file)
                clf.classification(images, size,
                                   status.split(':')[0],
                                   status.split(':')[1])
        except Exception as e:
            # fun.remove(image_status.get('file_id'))
            current_app.logger.error(e)
            db.session.rollback()
            return jsonify(err_no=RET.DBERR, err_desc='图片保存失败')
    else:
        return jsonify(err_no=RET.DBERR, err_desc='无效参数')
    request_id = (request.cookies.get('session'))[:36]
    time_used = int((time.time() * 1000) - int(local_time * 1000))
    return jsonify(err_no=RET.OK,
                   err_desc='OK',
                   dataset_id=collection.id,
                   request_id=request_id,
                   time_used=time_used)
예제 #4
0
def upload_images(collection_name):
    """
    上传图片 检测/分割
    """
    local_time = time.time()
    try:
        collection = Collection.query.filter_by(
            user_id=g.user.id, name=collection_name).first()  # 当前集合
        if collection is None:
            return jsonify(err_no=RET.PARAMERR, err_desc='没有该数据集')
    except Exception as e:
        current_app.logger.error(e)
        return jsonify(err_no=RET.PARAMERR, err_desc='参数有误')

    status = request.form.get('status')

    upload_image_site = request.files.getlist('upload_image_site')  # 图片文件
    label_file = request.files.getlist('label_file')  # 标注文件
    table_site = request.files.get('table_site')  # 对应表文件

    if status == 'default':
        # 未标注
        try:
            if not upload_image_site:
                return jsonify(errno=RET.PARAMERR, errmsg='参数缺失')
            for up_file in upload_image_site:
                images = storage(up_file, collection)
                size = image_size(up_file)
                clf.default(images, size)

        except Exception as e:
            current_app.logger.error(e)
            db.session.rollback()
            return jsonify(err_no=RET.DBERR, err_desc='图片保存失败')
        return jsonify(err_no=RET.OK, err_desc='OK')

    if not upload_image_site and not label_file:
        return jsonify(err_no=RET.PARAMERR, err_desc='缺少图片文件或标记文件')

    label_file_name = []  # 标注列表名,有后缀
    label_list = []  # 标注列表名,无后缀
    for l_file in label_file:
        label_file_name.append(l_file.filename)
        label_list.append(l_file.filename.split('.')[0])

    if status is None:
        # 已经标注上传,未选择对应表
        for up_file in upload_image_site:
            try:
                images = storage(up_file, collection)
                size = image_size(up_file)
                if up_file.filename.split('.')[0] in label_list:
                    for l_file in label_file:
                        if up_file.filename.split(
                                '.')[0] + '.' + l_file.filename.split(
                                    '.')[1] == l_file.filename:
                            if l_file.filename.split('.')[1] == 'xml':
                                clf.segmentation(images, size, l_file,
                                                 collection)
                            elif l_file.filename.split('.')[1] == 'json':
                                cjf.segmentation(images, size, l_file)
                            else:
                                clf.default(images, size)
                        else:
                            continue
                else:
                    # 未标注
                    clf.default(images, size)

            except Exception as e:
                current_app.logger.error(e)
                db.session.rollback()
                return jsonify(err_no=RET.DBERR, err_desc='图片保存失败')
        return jsonify(err_no=RET.OK, err_desc='OK')

    if not status:
        return jsonify(err_on=RET.PARAMERR, err_desc='参数不完整')

    if status == 'appoint_table':
        # 对应表
        if not table_site:
            return jsonify(errno=RET.PARAMERR, errmsg='参数缺失')
        try:
            table_str = table_site.read()
            table_name_list = []  # 对应表,图片名
            table_file_list = []  # 对应表,标注文件名

            for i in table_str.split('\r\n'):
                table_name_list.append(i.split(':')[0])

                table_file_list.append(i.split(':')[1])
            for up_file in upload_image_site:
                images = storage(up_file, collection)
                size = image_size(up_file)
                if up_file.filename in table_name_list and \
                        table_str.split(up_file.filename + ':')[1].split('\r\n')[0] in label_file_name:
                    for l_file in label_file:
                        if up_file.filename.split(
                                '.')[0] + '.' + l_file.filename.split(
                                    '.')[1] == l_file.filename:
                            if l_file.filename.split('.')[1] == 'xml':
                                clf.segmentation(images, size, l_file,
                                                 collection)
                            elif l_file.filename.split('.')[1] == 'json':
                                cjf.segmentation(images, size, l_file)
                            else:
                                clf.default(images, size)
                        else:
                            continue
                else:
                    clf.default(images, size)
        except Exception as e:
            current_app.logger.error(e)
            db.session.rollback()
            return jsonify(err_no=RET.DBERR, err_desc='图片保存失败')

    request_id = (request.cookies.get('session'))[:36]
    time_used = int((time.time() * 1000) - int(local_time * 1000))
    return jsonify(err_no=RET.OK,
                   err_desc='OK',
                   dataset_id=collection.id,
                   request_id=request_id,
                   time_used=time_used)
예제 #5
0
def upload_image():
    """
    爬虫配置
    """
    # 配置参数字典
    from DataSet.celery_tasks.tasks import spider, same_images_clean
    data = request.get_json()
    print(data)
    if not data:
        return jsonify(err_no=RET.PARAMERR, err_desc= '参数缺失')
    collection_id = data['collection_id']
    spider_dict = dict()
    spider_save_list = list()
    try:
        collection = Collection.query.filter_by(id=collection_id).first
        if collection is None:
            return jsonify(err_no=RET.PARAMERR, err_desc='没有该数据集')
    except Exception as e:
        current_app.logger.error(e)
        return jsonify(err_no=RET.PARAMERR, err_desc='参数有误')

    data = request.get_json()
    number = data.get('number')
    number = int(number)
    spider_page_num = number/60 + 1
    if not number:
        return jsonify(err_no=RET.PARAMERR, err_desc='参数缺失')

    keywords = data.get('keywords')
    # 关键字搜图
    if keywords:
        keywords = keywords.split(';')
        for keyword in keywords:
            spider_dict['keyword'] = keyword
            spider_dict['spider_page_num'] = spider_page_num
            spider_dict['start_page'] = 1
            spider_dict['number'] = number
            spider_dict['collection_id'] = collection_id
            spider_save_list = spider.delay(json.dumps(spider_dict))

        spider_save_list =  spider_save_list.get()
        print(spider_save_list)

        # 数据库存储
        for spider_file in spider_save_list:
            image = Image()
            image.name = spider_file.get('filename')
            image.site = spider_file.get('file_id')
            image.collection_id = collection_id
            db.session.add(image)
            db.session.commit()
            images = Image.query.filter_by(name=image.name).first()

            image_url_path = current_app.config['NGINX_SITE'] + fun.getInfo(images.site)['group_file_id']

            file = urllib2.urlopen(image_url_path)
            size = image_size(file.read())

            clf.default(images, size)

        collection_id = int(collection_id)

        same_images_clean.delay(collection_id)

    # 以图搜图
    else:
        image = data.get('image')
        print(image)
        if not image:
            return jsonify(err_no=RET.PARAMERR, err_desc='参数缺失')

    return jsonify(err_no=RET.OK, err_desc='爬取结束')
예제 #6
0
def classification(status, table_site, upload_image_site, collection,
                   label_name):
    """
    分类/人脸
    :param status: 状态,----lmz改-》标注
    :param table_site: 对应表文件
    :param upload_image_site: 图片文件
    :param collection: 集合id
    :param label_name: 标签集合
    :return:
    """
    with app.app_context():
        collections = Collection.query.filter_by(id=collection).first()
        table_list = []
        error_list = []
        for file_name, up_file in upload_image_site.items():
            up_file = base64.b64decode(up_file)
            images = storage(up_file, collections, file_name)
            size = image_size(up_file)
            if status == 'appoint_table':
                # 对应表
                if table_site is None:
                    return '{"err_no": "1", "err_desc": "参数缺失"}'
                try:
                    if not table_list:
                        for i in table_site.split('\r\n'):
                            table_list.append(i.split(':')[0])
                    tables = table_site.split(file_name + ':')[1]
                    labels = Label.query.filter_by(
                        name=tables.split('\r\n')[0]).first()
                    if file_name in table_list and labels is not None:
                        # 标注上传
                        clf.classification(images, size, labels.label_id,
                                           labels.name)
                    else:
                        clf.default(images, size)
                except Exception as e:
                    fun.remove(images.site)
                    current_app.logger.error(e)
                    error_list.append(e)

            elif status == 'default':
                # 未标注
                if not upload_image_site:
                    return '{"err_no": "1", "err_desc": "参数缺失"}'
                try:
                    clf.default(images, size)
                except Exception as e:
                    current_app.logger.error(e)
                    fun.remove(images.site)
                    error_list.append(e)

            elif status.split(':')[1] in label_name:
                # 标注上传
                try:
                    label_data = Label.query.filter_by(
                        collection_id=collection,
                        label_id=status.split(':')[0]).first()
                    if label_data is None:
                        return '{"err_no": "1", "err_desc": "没有此标签"}'
                    if not upload_image_site:
                        return '{"err_no": "1", "err_desc": "参数缺失"}'
                    if status.split(':')[1] != label_data.name:
                        return '{"err_no": "1", "err_desc": "错误的标签ID"}'
                    clf.classification(images, size,
                                       status.split(':')[0],
                                       status.split(':')[1])
                except Exception as e:
                    fun.remove(images.site)
                    current_app.logger.error(e)
                    error_list.append(e)
                    db.session.delete(images)
                    db.session.commit()

            else:
                fun.remove(images.site)
                db.session.delete(images)
                db.session.commit()

        same_images_clean(collection)

        return '{"err_no": "0", "err_desc": "OK", "上传张数": "%s", "失败张数": "%s", "错误名称": "%s"}' % \
               (len(upload_image_site), len(error_list), error_list)
예제 #7
0
def detection(table_site, upload_image_site, label_file, collection):
    """
    检测/分割
    :param table_site: 对应表
    :param upload_image_site: 图片
    :param label_file: 标注文件
    :param collection: 集合id
    :return:
    """
    with app.app_context():
        collection_id = collection
        collection = Collection.query.filter_by(id=collection).first()

        label_file_name = []  # 标注列表名,有后缀
        label_list = []  # 标注列表名,无后缀
        if upload_image_site and label_file:
            for l_file in label_file:
                label_file_name.append(l_file)
                label_list.append(l_file.split('.')[0])

        table_name_list = []  # 对应表,图片名
        table_file_list = []  # 对应表,标注文件名
        error_list = []
        method = 'default'
        for file_name, up_file in upload_image_site.items():
            up_file = base64.b64decode(up_file)
            images = storage(up_file, collection, file_name)
            size = image_size(up_file)

            if all([table_site, upload_image_site, label_file, collection]):
                method = 'ap_table'
                # 对应表
                try:
                    if not all([table_name_list, table_file_list]):
                        for i in table_site.split('\r\n'):
                            table_name_list.append(i.split(':')[0])
                            table_file_list.append(i.split(':')[1])

                    if file_name in table_name_list and \
                            table_site.split(file_name + ':')[1].split('\r\n')[0] in label_file_name:
                        tagging_function(file_name, label_file, images, size,
                                         collection)
                    else:
                        clf.default(images, size)
                except Exception as e:
                    fun.remove(images.site)
                    current_app.logger.error(e)
                    error_list.append(e)

            elif all([upload_image_site, label_file, collection]):
                method = 'label'
                # 已经标注上传,未选择对应表
                try:
                    if file_name.split('.')[0] in label_list:
                        tagging_function(file_name, label_file, images, size,
                                         collection)
                    else:
                        clf.default(images, size)
                except Exception as e:
                    fun.remove(images.site)
                    current_app.logger.error(e)
                    error_list.append(e)

            elif all([upload_image_site, collection]):
                method = 'no_label'
                # 未标注
                try:
                    clf.default(images, size)
                except Exception as e:
                    fun.remove(images.site)
                    current_app.logger.error(e)
                    error_list.append(e)

            else:
                return '{"err_no": "1", "err_desc": "参数缺失"}'
        same_images_clean(collection_id)

        return '{"err_no": "0", "err_desc": "OK", "上传张数": "%s", "失败张数": "%s", "错误名称": "%s", "method":"%s"}' % \
               (len(upload_image_site), len(error_list), error_list, method)