Ejemplo n.º 1
0
def align(event, context):
    dt = json.loads(event["body"])
    _s1 = dt["s1"]
    _s2 = dt["s2"]
    _t1 = dt["t1"]
    _t2 = dt["t2"]
    _service = dt["service"]
    _concurrence = dt["concurrence"]
    _repetition = dt["repetition"]
    id = dt["execid"]
    s1 = list(_s1)
    s2 = list(_s2)
    started_at = datetime.datetime.now()
    bucket = dt["bucket"]
    try:
        s3_client.create_bucket(Bucket=bucket)
    except:
        print("bucket exists")
    result = {
        "id": id,
        "service": _service,
        "started_at": str(started_at),
        "s1": {
            "content": _s1,
            "title": _t1,
            "length": len(s1)
        },
        "s2": {
            "content": _s2,
            "title": _t2,
            "length": len(s2)
        },
        "algorithm": algorithm
    }

    local_id = "%.20f" % time.time()
    _output_filename = output_filename + str(id) + "_" + str(local_id)
    _output_path = str(_service) + "/repetition_" + str(
        _repetition) + "/concurrence_" + str(_concurrence) + "/"
    f = open("/tmp/" + _output_filename, "w+")
    f.write(json.dumps(result))
    f.close()
    s3_client.upload_file('/tmp/' + _output_filename, bucket,
                          _output_path + _output_filename + ".json")

    h = Hirschberg()
    a, b = h.align(s1, s2)
    score = h.score(a, b)

    align_s1 = a
    align_s2 = b
    finished_at = datetime.datetime.now()
    duration = str(finished_at - started_at)

    result = {
        "id": id,
        "service": _service,
        "started_at": str(started_at),
        "finished_at": str(finished_at),
        "s1": {
            "content": _s1,
            "title": _t1,
            "length": len(s1),
            "align": align_s1
        },
        "s2": {
            "content": _s2,
            "title": _t2,
            "length": len(s2),
            "align": align_s2
        },
        "duration": duration,
        "score": score,
        "algorithm": algorithm
    }
    f = open("/tmp/" + _output_filename, "w+")
    f.write(json.dumps(result))
    f.close()
    s3_client.upload_file('/tmp/' + _output_filename, bucket,
                          _output_path + _output_filename + ".json")
    return {'statusCode': 200, 'body': json.dumps({"result": "done"})}
Ejemplo n.º 2
0
def hAlign(a, b):
    h = Hirschberg()
    a, b = h.align(a, b)
    return h.score(a, b)
Ejemplo n.º 3
0
def align( event ):
    tempPath = "/tmp/"
    _s1 = event["s1"]
    _s2 = event["s2"]
    _t1 = event["t1"]
    _t2 = event["t2"]
    _service = event["service"]
    _concurrence = event["concurrence"]
    _repetition = event["repetition"]
    id = event["execid"]
    s1 = list(_s1)
    s2 = list(_s2)
    started_at = datetime.datetime.now()
    bucket_name = event["bucket"]
        
    try:
        bucket = storage_client.get_bucket(bucket_name)
    except:
        bucket = storage_client.bucket(bucket_name)
        bucket.location = 'us'
        bucket.create()

    result = {
        "id":id,
        "service":_service,
        "started_at":str(started_at),
        "s1": {"content":_s1, "title":_t1, "length":len(s1)},
        "s2": {"content":_s2, "title":_t2, "length":len(s2)},
        "algorithm":algorithm
    }
    
    local_id = "%.20f" % time.time()
    _output_filename = output_filename + str(id) + "_" + str(local_id)
    _output_path = _service + "/repetition_" + _repetition + "/concurrence_" + _concurrence + "/"
    f = open(tempPath+_output_filename,"w+")
    f.write(json.dumps(result))
    f.close()

    blob = bucket.blob(_output_path+_output_filename+".json")
    blob.upload_from_filename(tempPath+_output_filename)
    
    h = Hirschberg()
    a,b = h.align(s1,s2)
    score = h.score(a, b)

    align_s1 =  a
    align_s2 =  b
    finished_at = datetime.datetime.now()
    duration =  str(finished_at - started_at)
    
    result = {
        "id":id,
        "service":_service,
        "started_at":str(started_at),
        "finished_at":str(finished_at),
        "s1": {"content":_s1, "title":_t1, "length":len(s1),"align":align_s1},
        "s2": {"content":_s2, "title":_t2, "length":len(s2),"align":align_s2},
        "duration":duration,
        "score":score,
        "algorithm":algorithm
    }
    f = open(tempPath+_output_filename,"w+")
    f.write(json.dumps(result))
    f.close()
    
    blob = bucket.blob(_output_path+_output_filename+".json")
    blob.upload_from_filename(tempPath+_output_filename)
    
    return {
        'statusCode': 200,
        'body': json.dumps({"result":"done"})
    }
Ejemplo n.º 4
0
        annotation = reader.parse()

        if len(annotation['T']) == 0:
            writer.write('output', pmid + '.ann', annotation)
            continue

        gold = os.path.join('output', pmid + '.txt')
        goldFile = codecs.open(gold, 'r', 'utf-8')
        goldText = goldFile.read().strip()
        goldFile.close()

        entities = annotation['T']

        goldPhrases = get_phrase(goldText)
        alterPhrases = get_phrase(alterText)
        h = Hirschberg(goldPhrases, alterPhrases)
        #h = Hirschberg(list(goldText),list(alterText))
        alignGold, alignAlter = h.align()
        #print ''.join(alignGold)
        #print ''.join(alignAlter)
        alter2gold = h.map_alignment(''.join(alignGold), ''.join(alignAlter))

        for k, e in entities.iteritems():
            start = int(e.start)
            end = int(e.end)

            e.start = alter2gold[start]
            if alter2gold[end] - alter2gold[end - 1] > 1:
                e.end = alter2gold[end - 1] + 1
            else:
                e.end = alter2gold[end]
Ejemplo n.º 5
0
def align(event):
    tempPath = "/tmp/"
    _s1 = event["s1"]
    _s2 = event["s2"]
    _t1 = event["t1"]
    _t2 = event["t2"]
    _service = event["service"]
    _concurrence = event["concurrence"]
    _repetition = event["repetition"]
    id = event["execid"]
    s1 = list(_s1)
    s2 = list(_s2)
    started_at = datetime.datetime.now()
    container_name = event["container"]
    storage_account_string_connection = event["storageConnection"]
    blob_service_client = BlobServiceClient.from_connection_string(
        storage_account_string_connection)

    blob = blob_service_client.get_container_client(container_name)
    if not blob.exists():
        blob_service_client.create_container(container_name)

    result = {
        "id": id,
        "service": _service,
        "started_at": str(started_at),
        "s1": {
            "content": _s1,
            "title": _t1,
            "length": len(s1)
        },
        "s2": {
            "content": _s2,
            "title": _t2,
            "length": len(s2)
        },
        "algorithm": algorithm
    }

    local_id = "%.20f" % time.time()
    _output_path = _service + "/repetition_" + _repetition + "/concurrence_" + _concurrence + "/"
    _output_filename = output_filename + str(id) + "_" + str(
        local_id) + ".json"

    f = open(tempPath + _output_filename, "w+")
    f.write(json.dumps(result))
    f.close()

    upload_file_path = os.path.join(tempPath, _output_filename)

    blob_client = blob_service_client.get_blob_client(container=container_name,
                                                      blob=_output_path +
                                                      _output_filename)
    with open(upload_file_path, "rb") as data:
        blob_client.upload_blob(data)

    h = Hirschberg()
    a, b = h.align(s1, s2)
    score = h.score(a, b)

    align_s1 = a
    align_s2 = b
    finished_at = datetime.datetime.now()
    duration = str(finished_at - started_at)

    result = {
        "id": id,
        "service": _service,
        "started_at": str(started_at),
        "finished_at": str(finished_at),
        "s1": {
            "content": _s1,
            "title": _t1,
            "length": len(s1),
            "align": align_s1
        },
        "s2": {
            "content": _s2,
            "title": _t2,
            "length": len(s2),
            "align": align_s2
        },
        "duration": duration,
        "score": score,
        "algorithm": algorithm
    }
    f = open(tempPath + _output_filename, "w+")
    f.write(json.dumps(result))
    f.close()

    blob_client = blob_service_client.get_blob_client(container=container_name,
                                                      blob=_output_path +
                                                      _output_filename)
    with open(upload_file_path, "rb") as data:
        blob_client.upload_blob(data, overwrite=True)

    return {'statusCode': 200, 'body': json.dumps({"result": "done"})}
Ejemplo n.º 6
0
def align(event, context, testType, bucket):
    metrics_before = getMetrics()
    bd = event["body"]
    base64_message = bd
    base64_bytes = base64_message.encode('ascii')
    message_bytes = base64.b64decode(base64_bytes)
    message = message_bytes.decode('ascii')
    dt = json.loads(message)
    _s1 = dt["s1"]
    _s2 = dt["s2"]
    _t1 = dt["t1"]
    _t2 = dt["t2"]
    _type = testType
    id = dt["id"]
    s1 = list(_s1)
    s2 = list(_s2)
    started_at = datetime.datetime.now()

    result = {
        "id": id,
        "type": _type,
        "metrics": {
            "before": metrics_before
        },
        "started_at": str(started_at),
        "s1": {
            "content": _s1,
            "title": _t1,
            "length": len(s1)
        },
        "s2": {
            "content": _s2,
            "title": _t2,
            "length": len(s2)
        },
        "algorithm": algorithm
    }
    _output_filename = output_filename + str(id)
    f = open("/tmp/" + _output_filename, "w+")
    f.write(json.dumps(result))
    f.close()
    s3_client.upload_file('/tmp/' + _output_filename, bucket,
                          _output_filename + ".json")

    h = Hirschberg()
    a, b = h.align(s1, s2)
    score = h.score(a, b)

    metrics_after = getMetrics()

    align_s1 = a
    align_s2 = b
    finished_at = datetime.datetime.now()
    duration = str(finished_at - started_at)

    result = {
        "id": id,
        "type": _type,
        "metrics": {
            "before": metrics_before,
            "after": metrics_after,
            "variance": calc_metrics_variance(metrics_before, metrics_after)
        },
        "started_at": str(started_at),
        "finished_at": str(finished_at),
        "s1": {
            "content": _s1,
            "title": _t1,
            "length": len(s1),
            "align": align_s1
        },
        "s2": {
            "content": _s2,
            "title": _t2,
            "length": len(s2),
            "align": align_s2
        },
        "duration": duration,
        "score": score,
        "algorithm": algorithm
    }
    f = open("/tmp/" + _output_filename, "w+")
    f.write(json.dumps(result))
    f.close()
    s3_client.upload_file('/tmp/' + _output_filename, bucket,
                          _output_filename + ".json")
    return {'statusCode': 200, 'body': json.dumps({"result": "done"})}