예제 #1
0
        iob = []
        is_inside = False
        for w in pos:
            if not re.search("<.*>", w):
                if is_inside:
                    iob.append("{}/I-T".format(w))
                    is_inside = False
                else:
                    iob.append("{}/O".format(w))
            elif iob and w == tag:
                iob[-1] = iob[-1].replace('/O', '/B-T')
                is_inside = True
            else:
                # Other tags - skip them
                pass
        return " ".join(iob)

    def is_self_closing_tag(self, tag):
        return re.match('<[^<>]+/>', tag)


if __name__ == "__main__":
    from Config.Config import G_CONFIG
    G_CONFIG.config_logging()

    task = Task(sys.argv[1])
    # Launch RDD parallel processing
    task.get_rdd().mapPartitionsWithIndex(PosTagTask(task)).foreachPartition(
        Task.save_segments)
    task.finalize()
예제 #2
0
from flask_principal import Principal
from flask_jwt import JWT

from celery import Celery

from datetime import timedelta

from Config.Config import G_CONFIG

app = Flask(__name__)
app.config['SECRET_KEY'] = 'super-secret'
app.config['VERSION'] = 1
app.config['PROPAGATE_EXCEPTIONS'] = True

# Setup logging
handler = G_CONFIG.config_logging()
if handler: app.logger.addHandler(handler)
# Add file logger
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.DEBUG)
app.logger.addHandler(stream_handler)
# fix gives access to the gunicorn error log facility
app.logger.handlers.extend(logging.getLogger("gunicorn.error").handlers)

principals = Principal(app)

# Celery configuration
app.config['CELERY_BROKER_URL'] = 'redis://localhost:6379/0'
app.config['CELERY_RESULT_BACKEND'] = 'redis://localhost:6379/0'
# Initialize Celery
celery = Celery(app.name, broker=app.config['CELERY_BROKER_URL'])