Ejemplo n.º 1
0
def getnodes():
    theheight = int(request.form['height'])
    thewidth = int(request.form['width'])
    val = [int(val) for val in request.form['blob'].split(',')]
    groupedval = list_of_lists = [
        np.array(val[i:i + thewidth], dtype=np.uint8)
        for i in range(0, theheight * thewidth, thewidth)
    ]
    arr = np.array(groupedval, dtype=np.uint8)
    colorimg = cv2.cvtColor(arr, cv2.COLOR_GRAY2BGR)
    success, encoded_image = cv2.imencode('.jpg', colorimg)
    content2 = encoded_image.tobytes()
    image = vision.Image(content=content2)
    response = client.face_detection(image=image)
    faces = response.face_annotations
    theres = []
    for face in faces:
        vertices = ([
            '({},{})'.format(vertex.x, vertex.y)
            for vertex in face.bounding_poly.vertices
        ])
        p1 = eval(vertices[0])
        p2 = eval(vertices[1])
        p3 = eval(vertices[2])
        p4 = eval(vertices[3])
        theres.append([p1, p2, p3, p4])
    print(theres)

    blob = cv2.dnn.blobFromImage(colorimg,
                                 1 / 255, (416, 416), (0, 0, 0),
                                 swapRB=True,
                                 crop=False)
    net.setInput(blob)  #Set input from the network
    output_layers_names = net.getUnconnectedOutLayersNames()
    layerOutputs = net.forward(
        output_layers_names
    )  #Get Outputs from this function, (run forward pass and obtain outputs at output layer)

    boxes = []
    confidences = []
    class_ids = []

    for output in layerOutputs:
        for detection in output:  #Output ->
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.6 and class_id == 0:
                x = int(detection[0] * thewidth)  #Normalized Values
                y = int(detection[1] * theheight)
                w = int(detection[2] * thewidth)
                h = int(detection[4] * theheight)
                boxes.append([x, y, w, h])
                confidences.append((float(confidence)))
                class_ids.append(class_id)

    index = cv2.dnn.NMSBoxes(boxes, confidences, 0.6, 0.4)
    ret = []
    if len(index) > 0:
        for i in index.flatten():
            x, y, w, h = boxes[i]
            ret.append([x, y])
    everything = [ret, theres]

    return make_response(jsonify(everything), 200)
 def __init__(self):
     self.client = vision.ImageAnnotatorClient()
     self.image = vision.Image()
Ejemplo n.º 3
0
def hello_world(request):

    from google.cloud import vision
    from datetime import datetime
    import re
    import itertools
    import write2bq
    #from google.oauth2 import service_account
    import os
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="C:\gcp_credentials\elaborate-howl-285701-105c2e8355a8.json"
    #SCOPES = ['https://www.googleapis.com/auth/sqlservice.admin']
    #SERVICE_ACCOUNT_FILE = 'C:\gcp_credentials\elaborate-howl-285701-105c2e8355a8.json'
    #credentials = service_account.Credentials.from_service_account_file(
    #    SERVICE_ACCOUNT_FILE, scopes=SCOPES)
    table_id='elaborate-howl-285701.context.image_web_entities'#destination table name

    now = str(datetime.now())# time

    print("now="+now)

    client = vision.ImageAnnotatorClient()
    request_json = request.get_json()
    image = vision.Image()
    if request_json:
        source_url = request_json['source_url']
        print("source_url="+source_url)

    source_url=re.match(r'gs://([^/]+)/(.+)', source_url) 
    bucket_name=source_url.group(1) #credential bucket name
    print(bucket_name)
    prefix=source_url.group(2)# credential prefix name
    print(prefix)



    file_name=prefix
    exact_file_name_list = re.split("/", file_name)
    exact_file_name=exact_file_name_list[-1]
    



    uri="gs://"+bucket_name+"/"+file_name
    print("uri="+uri)

    image.source.image_uri = uri

    response = client.web_detection(image=image)
    matching_images_lst=[]
    matching_images=response.web_detection.full_matching_images# url string in it creates problem from json
    for matching_image in matching_images:
        matching_images_lst.append(matching_image.url)
    # list is made for matching images
    page_lst=[]
    for page in response.web_detection.pages_with_matching_images:
        page_lst.append(page.url)
    # list is made for pages
    best_match_lst=[]#list empty which stores best match result
    for best_match in response.web_detection.best_guess_labels:
        best_match_lst.append(best_match.label)

    for (a, b, c) in itertools.zip_longest(matching_images_lst, page_lst, best_match_lst): 
        documentEntities={"time_stamp":now,"file_name":exact_file_name,"matching_images":a,"pages_with_images":b,"best_guess":c,"input_uri":uri}
        write2bq.BQ(documentEntities,table_id)
    
    return "success"
     


     
     
     




     
Ejemplo n.º 4
0
def extract_words(img, height, width, ocr_engine='pytesseract'):
    if ocr_engine == 'pytesseract':
        data = pytesseract.image_to_data(img, output_type=Output.DICT)
        n_boxes = len(data['text'])
        words = [
            {
                'text': data['text'][i],
                'left': data['left'][i],
                'top': data['top'][i],
                'right': data['left'][i] + data['width'][i],
                'bottom': data['top'][i] + data['height'][i]
            }
            for i in range(n_boxes) if data['text'][i]
        ]
        return words

    
    elif ocr_engine=='google_ocr':
        img_byte_arr = io.BytesIO()
        img.save(img_byte_arr, format='PNG')
        img_byte_arr = img_byte_arr.getvalue()
        client = vision.ImageAnnotatorClient()
        content=img_byte_arr
        image_ = vision.Image(content=content)
        response = client.text_detection(image=image_)
        texts = response.text_annotations

        words=[]
        first=True
        for text in texts:
            if first:
                first=False
                continue
            data={}
            data['text']=text.description
            x_vert=[]
            y_vert=[]
            for vertex in text.bounding_poly.vertices:
                x_vert.append(vertex.x)
                y_vert.append(vertex.y)
            data['left']=min(x_vert)
            data['right']=max(x_vert)
            data['top']=min(y_vert)
            data['bottom']=max(y_vert)
            words.append(data)
        return words
    
    
    elif ocr_engine == 'aws_textract':

        import boto3

        # use aws textract
        client = boto3.client('textract')

        # convert PpmImageFile to byte
        img_byte_arr = io.BytesIO()
        img.save(img_byte_arr, format='PNG')
        img_byte_arr = img_byte_arr.getvalue()

        # call aws-textract API
        response = client.detect_document_text(Document={'Bytes': img_byte_arr})

        # get image weight and height to convert normalized coordinate from response
        words = [
            {
                'text': data['Text'],
                'left': math.floor((data['Geometry']['BoundingBox']['Left']) * width),
                'top': math.floor((data['Geometry']['BoundingBox']['Top']) * height),
                'right': math.ceil(
                    (data['Geometry']['BoundingBox']['Left'] + data['Geometry']['BoundingBox']['Width']) * width),
                'bottom': math.ceil(
                    (data['Geometry']['BoundingBox']['Top'] + data['Geometry']['BoundingBox']['Height']) * height)
            } for data in response['Blocks'] if "Text" in data
        ]
        return words
Ejemplo n.º 5
0
def ocr_url(url):
    image = vision.Image()
    image.source.image_uri = url

    return vision_client.document_text_detection(image=image)
Ejemplo n.º 6
0
def ProcessMaerskInvoice(ImageList):
    keywordlist = [
        'no.:', 'maersk', 'from:', 'to:', 'description:', 'quantity',
        'itinerary', "size", 'sub.', 'collapsible', 'gross', 'equip', 'pack.',
        'weight', 'volume', 'qty/kind', 'type', 'release', 'vessel', 'voy',
        'etd', 'eta'
    ]
    ############ Preprocess Image ###########

    for image in ImageList:
        currentImage = cv2.imread(image)
        currentImage[currentImage < 10] = 0
        currentImage[(currentImage != 0) & (currentImage != 255)] = 255
        cv2.imwrite(image, currentImage)
    ################ Invoke Vision API for 2nd page ############################
    try:
        for count, image in enumerate(ImageList):
            if count < 2:
                currentfile = ImageList[count]
                with io.open(currentfile, 'rb') as gen_image_file:
                    content = gen_image_file.read()
                client = vision.ImageAnnotatorClient()
                #image = vision.types.Image(content=content)
                image = vision.Image(content=content)
                response = client.text_detection(image=image)
                DictResponse = MessageToDict(response._pb)
                if count == 0:
                    FirstPageDictResponse = DictResponse
                else:
                    SecondPageDictResponse = DictResponse
    except:
        return "invocation error"
    ############# Create Message To Dict For 2nd Page ###############
    SecondPageDictResponse = MessageToDict(response._pb)
    ############# Check for Keywords ##################
    WholeContentDescription = FirstPageDictResponse['textAnnotations'][0][
        'description'].lower() + " " + SecondPageDictResponse[
            'textAnnotations'][0]['description'].lower()
    match = 0
    for keyword in keywordlist:
        if keyword in WholeContentDescription:
            match = match + 1
        else:
            print(keyword)
    if match != len(keywordlist):
        return "missing keywords"
    ############# create Dataframes #########################
    WordsAndCoordinatesPage1 = FirstPageDictResponse['textAnnotations'][1:]
    WordsAndCoordinatesPage2 = SecondPageDictResponse['textAnnotations'][1:]
    WordsAndCoordinates = [WordsAndCoordinatesPage1, WordsAndCoordinatesPage2]
    for num in range(0, len(WordsAndCoordinates)):
        currentWordandCoordinate = WordsAndCoordinates[num]
        word_list = []
        llx_list = []
        lly_list = []
        lrx_list = []
        lry_list = []
        urx_list = []
        ury_list = []
        ulx_list = []
        uly_list = []
        for i in range(0, len(currentWordandCoordinate)):
            word_list.append(currentWordandCoordinate[i]['description'])
            llx_list.append(currentWordandCoordinate[i]['boundingPoly']
                            ['vertices'][0]['x'])
            lly_list.append(currentWordandCoordinate[i]['boundingPoly']
                            ['vertices'][0]['y'])
            lrx_list.append(currentWordandCoordinate[i]['boundingPoly']
                            ['vertices'][1]['x'])
            lry_list.append(currentWordandCoordinate[i]['boundingPoly']
                            ['vertices'][1]['y'])
            urx_list.append(currentWordandCoordinate[i]['boundingPoly']
                            ['vertices'][2]['x'])
            ury_list.append(currentWordandCoordinate[i]['boundingPoly']
                            ['vertices'][2]['y'])
            ulx_list.append(currentWordandCoordinate[i]['boundingPoly']
                            ['vertices'][3]['x'])
            uly_list.append(currentWordandCoordinate[i]['boundingPoly']
                            ['vertices'][3]['y'])
        ##################### Create Dictionary for the lists #####################
        WordsAndCoordinatesDict = {
            "Word": word_list,
            'llx': llx_list,
            'lly': lly_list,
            'lrx': lrx_list,
            'lry': lry_list,
            'urx': urx_list,
            'ury': ury_list,
            'ulx': ulx_list,
            'uly': uly_list
        }
        ####################### Create Dataframe ######################
        if num == 0:
            WordsAndCoordinatesDF_Page1 = pd.DataFrame.from_dict(
                WordsAndCoordinatesDict)
        elif num == 1:
            WordsAndCoordinatesDF_Page2 = pd.DataFrame.from_dict(
                WordsAndCoordinatesDict)
    ###################### Get Values ###########################
    try:
        ############## Booking Number ############################
        BookingNumber_uly = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['No.:'
                                                      ])]['uly'].values[0] - 20
        BookingNumber_lly = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['No.:'
                                                      ])]['lly'].values[0] + 20
        BookingNumber_urx = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['No.:'
                                                      ])]['urx'].values[0]
        MeraskSpot_llx = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(
                ['Maersk'])].sort_values(by='lly').head(1)['llx'].values[0]
        BookingNumber = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['uly'] > BookingNumber_uly)
            & (WordsAndCoordinatesDF_Page1['lly'] < BookingNumber_lly) &
            (WordsAndCoordinatesDF_Page1['ulx'] > BookingNumber_urx) &
            (WordsAndCoordinatesDF_Page1['urx'] < MeraskSpot_llx)]
        BookingNumber = " ".join(BookingNumber['Word'].values).strip()
        print(BookingNumber)
        ############## From #############################
        From_uly = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['From:'
                                                      ])]['uly'].values[0] - 30
        From_lly = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['From:'
                                                      ])]['lly'].values[0] + 30
        From_urx = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['From:'
                                                      ])]['urx'].values[0]
        From = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['uly'] > From_uly)
            & (WordsAndCoordinatesDF_Page1['lly'] < From_lly) &
            (WordsAndCoordinatesDF_Page1['ulx'] > From_urx)]
        From = " ".join(From['Word'].values).strip()
        print(From)
        ################# To #############################
        To_uly = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['To:'
                                                      ])]['uly'].values[0] - 20
        To_lly = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['To:'
                                                      ])]['lly'].values[0] + 20
        To_urx = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['To:'])]['urx'].values[0]
        To = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['uly'] > To_uly)
            & (WordsAndCoordinatesDF_Page1['lly'] < To_lly) &
            (WordsAndCoordinatesDF_Page1['ulx'] > To_urx)]
        To = " ".join(To['Word'].values).strip()
        print(To)
        ############# Commodity Description ###################
        Description_uly = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Description:'
                                                      ])]['uly'].values[0] - 20
        Description_lly = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Description:'
                                                      ])]['lly'].values[0] + 20
        Description_urx = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Description:'
                                                      ])]['urx'].values[0]
        Commodity = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['uly'] > Description_uly)
            & (WordsAndCoordinatesDF_Page1['lly'] < Description_lly) &
            (WordsAndCoordinatesDF_Page1['ulx'] > Description_urx)]
        CommodityDescription = " ".join(Commodity['Word'].values).strip()
        ################ Quantity #########################
        Quantity_LLY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Quantity'
                                                      ])]['lly'].values[0] + 20
        Itinerary_ULY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Itinerary'
                                                      ])]['uly'].values[0] - 20
        Size_LLX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Size/Type/Height'
                                                      ])]['llx'].values[0]
        Quantity = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['lly'] > Quantity_LLY)
            & (WordsAndCoordinatesDF_Page1['uly'] < Itinerary_ULY) &
            (WordsAndCoordinatesDF_Page1['lrx'] < Size_LLX)]
        Quantity = " ".join(Quantity['Word'].values).strip()
        print(Quantity)
        ################ Size ##############################
        Quantity_LLY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Quantity'
                                                      ])]['lly'].values[0] + 20
        Itinerary_ULY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Itinerary'
                                                      ])]['uly'].values[0] - 20
        Quatity_LRX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Quantity'
                                                      ])]['lrx'].values[0]
        Sub_LLX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Sub.'
                                                      ])]['llx'].values[0]
        Size = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['lly'] > Quantity_LLY)
            & (WordsAndCoordinatesDF_Page1['uly'] < Itinerary_ULY) &
            (WordsAndCoordinatesDF_Page1['llx'] > Quatity_LRX) &
            (WordsAndCoordinatesDF_Page1['lrx'] < Sub_LLX)].sort_values(
                by=['llx'])
        Size = " ".join(Size['Word'].values).strip()
        print(Size)
        ################ Sub Equipment #####################
        Quantity_LLY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Quantity'
                                                      ])]['lly'].values[0] + 20
        Itinerary_ULY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Itinerary'
                                                      ])]['uly'].values[0] - 20
        Collapsible_LRX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Collapsible'
                                                      ])]['lrx'].values[0]
        Gross_LLX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Gross'
                                                      ])]['llx'].values[0]
        SubEquipment = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['lly'] > Quantity_LLY)
            & (WordsAndCoordinatesDF_Page1['uly'] < Itinerary_ULY) &
            (WordsAndCoordinatesDF_Page1['llx'] > Collapsible_LRX) &
            (WordsAndCoordinatesDF_Page1['lrx'] < Gross_LLX)]
        SubEquipment = " ".join(SubEquipment['Word'].values).strip()
        ############### Gross Weight #####################
        Quantity_LLY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Quantity'
                                                      ])]['lly'].values[0] + 20
        Itinerary_ULY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Itinerary'
                                                      ])]['uly'].values[0] - 20
        Equip_LRX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Equip'
                                                      ])]['lrx'].values[0]
        Pack_LLX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Pack.'
                                                      ])]['llx'].values[0]
        GrossWeight = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['lly'] > Quantity_LLY)
            & (WordsAndCoordinatesDF_Page1['uly'] < Itinerary_ULY) &
            (WordsAndCoordinatesDF_Page1['llx'] > Equip_LRX) &
            (WordsAndCoordinatesDF_Page1['lrx'] < Pack_LLX)]
        GrossWeight = " ".join(GrossWeight['Word'].values).strip()
        ############### Pack Quantity ######################
        Quantity_LLY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Quantity'
                                                      ])]['lly'].values[0] + 20
        Itinerary_ULY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Itinerary'
                                                      ])]['uly'].values[0] - 20
        Weight_LRX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Weight'
                                                      ])]['lrx'].values[0] + 40
        Cargo_LLX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Volume'
                                                      ])]['llx'].values[0]
        PackQuantity = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['lly'] > Quantity_LLY)
            & (WordsAndCoordinatesDF_Page1['uly'] < Itinerary_ULY) &
            (WordsAndCoordinatesDF_Page1['llx'] > Weight_LRX) &
            (WordsAndCoordinatesDF_Page1['lrx'] < Cargo_LLX)]
        PackQuantity = " ".join(PackQuantity['Word'].values).strip()
        ############## Cargo Volume ##########################
        Quantity_LLY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Quantity'
                                                      ])]['lly'].values[0] + 20
        Itinerary_ULY = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Itinerary'
                                                      ])]['uly'].values[0] - 20
        Weight_LRX = WordsAndCoordinatesDF_Page1[
            WordsAndCoordinatesDF_Page1['Word'].isin(['Qty/Kind'
                                                      ])]['lrx'].values[0] + 20
        CargoVolume = WordsAndCoordinatesDF_Page1[
            (WordsAndCoordinatesDF_Page1['lly'] > Quantity_LLY)
            & (WordsAndCoordinatesDF_Page1['uly'] < Itinerary_ULY) &
            (WordsAndCoordinatesDF_Page1['llx'] > Weight_LRX)]
        CargoVolume = " ".join(CargoVolume['Word'].values).strip()
        ######## Load Itinerary Type and Location ###############
        Type_lly = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Туре'
                                                      ])]['lly'].values[0]
        MaxLowerLimit = Type_lly + 160
        Type_urx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Туре'
                                                      ])]['urx'].values[0]
        MaxURX = Type_urx + 160
        LoadItineraryType = WordsAndCoordinatesDF_Page2[
            (WordsAndCoordinatesDF_Page2['lly'] > Type_lly)
            & (WordsAndCoordinatesDF_Page2['lly'] < MaxLowerLimit) &
            (WordsAndCoordinatesDF_Page2['lrx'] < MaxURX)]
        LoadItineraryType = " ".join(LoadItineraryType['Word'].values)
        Location_lly = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Location'
                                                      ])]['lly'].values[0]
        MaxLowerLimit = Location_lly + 160
        Type_urx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Туре'
                                                      ])]['urx'].values[0]
        MaxURX = Type_urx + 160
        Release_llx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Release'
                                                      ])]['ulx'].values[0]
        LoadItineraryLocation = WordsAndCoordinatesDF_Page2[
            (WordsAndCoordinatesDF_Page2['lly'] > Location_lly)
            & (WordsAndCoordinatesDF_Page2['lly'] < MaxLowerLimit) &
            (WordsAndCoordinatesDF_Page2['lrx'] < Release_llx) &
            (WordsAndCoordinatesDF_Page2['llx'] > MaxURX)]
        LoadItineraryLocation = " ".join(LoadItineraryLocation['Word'].values)
        ############### TransportPlanVessel ##########################
        Vessel_lly = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Vessel'
                                                      ])]['lly'].values[0] + 20
        max_lly = Vessel_lly + 80
        Vessel_llx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Vessel'
                                                      ])]['llx'].values[0]
        Voy_llx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Voy'])]['llx'].values[0]
        TransportPlanVessel = WordsAndCoordinatesDF_Page2[
            (WordsAndCoordinatesDF_Page2['lly'] > Vessel_lly)
            & (WordsAndCoordinatesDF_Page2['lly'] < max_lly) &
            (WordsAndCoordinatesDF_Page2['llx'] >= Vessel_llx) &
            (WordsAndCoordinatesDF_Page2['lrx'] < Voy_llx)]
        TransportPlanVessel = " ".join(TransportPlanVessel['Word'].values)
        ############ TransportVoyNumber #############################
        Voy_lly = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Voy'
                                                      ])]['lly'].values[0] + 20
        max_lly = Voy_lly + 80
        Voy_llx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['Voy'])]['llx'].values[0]
        ETD_llx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['ETD'])]['llx'].values[0]
        TransportVoyNumber = WordsAndCoordinatesDF_Page2[
            (WordsAndCoordinatesDF_Page2['lly'] > Voy_lly)
            & (WordsAndCoordinatesDF_Page2['lly'] < max_lly) &
            (WordsAndCoordinatesDF_Page2['llx'] >= Voy_llx) &
            (WordsAndCoordinatesDF_Page2['lrx'] < ETD_llx)]
        TransportVoyNumber = " ".join(TransportVoyNumber['Word'].values)
        ############## TransportPlanETD ###############################
        ETD_lly = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['ETD'])]['lly'].values[0]
        max_lly = Voy_lly + 80
        ETD_llx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['ETD'
                                                      ])]['llx'].values[0] - 20
        ETA_llx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['ЕТА'])]['llx'].values[0]
        TransportPlanETD = WordsAndCoordinatesDF_Page2[
            (WordsAndCoordinatesDF_Page2['lly'] > ETD_lly)
            & (WordsAndCoordinatesDF_Page2['lly'] < max_lly) &
            (WordsAndCoordinatesDF_Page2['llx'] >= ETD_llx) &
            (WordsAndCoordinatesDF_Page2['lrx'] < ETA_llx)]
        TransportPlanETD = " ".join(TransportPlanETD['Word'].values)
        ################## TransportPlanETA #############################
        ETA_lly = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['ЕТА'])]['lly'].values[0]
        max_lly = ETA_lly + 80
        ETA_llx = WordsAndCoordinatesDF_Page2[
            WordsAndCoordinatesDF_Page2['Word'].isin(['ЕТА'
                                                      ])]['llx'].values[0] - 20
        TransportPlanETA = WordsAndCoordinatesDF_Page2[
            (WordsAndCoordinatesDF_Page2['lly'] > ETA_lly)
            & (WordsAndCoordinatesDF_Page2['lly'] < max_lly) &
            (WordsAndCoordinatesDF_Page2['llx'] >= ETA_llx)]
        TransportPlanETA = " ".join(TransportPlanETA['Word'].values)
        print(TransportPlanETA)
        return dict(msg="Success",
                    BookingNumber=BookingNumber,
                    From=From,
                    To=To,
                    CommodityDescription=CommodityDescription,
                    Quantity=Quantity,
                    Size=Size,
                    SubEquipment=SubEquipment,
                    GrossWeight=GrossWeight,
                    PackQuantity=PackQuantity,
                    CargoVolume=CargoVolume,
                    LoadItineraryType=LoadItineraryType,
                    LoadItineraryLocation=LoadItineraryLocation,
                    TransportPlanVessel=TransportPlanVessel,
                    TransportVoyNumber=TransportVoyNumber,
                    TransportPlanETD=TransportPlanETD,
                    TransportPlanETA=TransportPlanETA)
    except:
        return "unable to extract data from Google Vision API."
def detect_infile(path):
    with io.open(path, 'rb') as image_file:
        content = image_file.read()

    image = vision.Image(content=content)
    objects = client.object_localization(
        image=image).localized_object_annotations
    # print(objects)
    response = client.face_detection(image=image)
    faces = response.face_annotations

    # Names of likelihood from google.cloud.vision.enums
    likelihood_name = ('UNKNOWN', 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE',
                       'LIKELY', 'VERY_LIKELY')
    likelihood_num = (0,1,2,3,4,5)            

    faces_info = {}
    i = 0
    # Face detection
    for face in faces:
        faces_info[i] = {}
        faces_info[i]['anger'] = likelihood_num[face.anger_likelihood]
        faces_info[i]['joy'] = likelihood_num[face.joy_likelihood]
        faces_info[i]['sorrow'] = likelihood_num[face.sorrow_likelihood]
        faces_info[i]['surprise'] = likelihood_num[face.surprise_likelihood]
        all_emotions = {'anger' : likelihood_num[face.anger_likelihood],  'joy' :likelihood_num[face.joy_likelihood], 
        'sorrow' :likelihood_num[face.sorrow_likelihood], 'surprise' :likelihood_num[face.surprise_likelihood]}
        all_emotions = {k: v for k, v in sorted(all_emotions.items(), key=lambda item: item[1], reverse=True)}
        emo_values = all_emotions.values()
        value_iterator = iter(emo_values)
        first_value = next(value_iterator)

        verticesX = ([vertex.x
                    for vertex in face.bounding_poly.vertices])
        verticesY = ([vertex.y
                    for vertex in face.bounding_poly.vertices])
        if first_value == 1 :
            faces_info[i]['current_emo'] = 'default'
        else :
            faces_info[i]['current_emo'] = list(all_emotions.keys())[0]
        
        faces_info[i]['vertices'] = [verticesX[0], verticesY[0],verticesX[2],verticesY[2]]
        i += 1

    objects_dict = {}
    j = 0
    #object detection
    for object_info in objects:
        if object_info.name != "Person" :
            verticesX = ([vertex.x for vertex in object_info.bounding_poly.normalized_vertices])
            verticesY = ([vertex.y for vertex in object_info.bounding_poly.normalized_vertices])

            objects_dict[j] = {"name" : object_info.name, 'vertices' : [int(verticesX[0] * img_width), int(verticesY[0] * img_height),int(verticesX[2] * img_width),int(verticesY[2] * img_height)]}
            j += 1

    return {'face_info' : {'current_emo' : faces_info[0]['current_emo'], 'vertices' : faces_info[0]['vertices'] }, 'objects' : objects_dict}
    if response.error.message:
        raise Exception(
            '{}\nFor more info on error messages, check: '
            'https://cloud.google.com/apis/design/errors'.format(
                response.error.message))
Ejemplo n.º 8
0
def detect_web(path):
    """Detects web annotations given an image."""
    from google.cloud import vision
    import io
    client = vision.ImageAnnotatorClient()

    with io.open(path, 'rb') as image_file:
        content = image_file.read()

    image = vision.Image(content=content)

    response = client.web_detection(image=image)
    annotations = response.web_detection
    best_guess_label = ""
    to_display = ""
    if annotations.best_guess_labels:
        for label in annotations.best_guess_labels:
            to_display = to_display + '\nBest guess label: {}'.format(
                label.label) + '\n'
            best_guess_label = label.label

    if annotations.pages_with_matching_images:
        to_display = to_display + '\n{} Pages with matching images found:'.format(
            len(annotations.pages_with_matching_images)) + '\n'

        for page in annotations.pages_with_matching_images:
            to_display = to_display + '\n\tPage url   : {}'.format(
                page.url) + '\n'

            if page.full_matching_images:
                to_display = to_display + '\t{} Full Matches found: '.format(
                    len(page.full_matching_images)) + '\n'

                for image in page.full_matching_images:
                    to_display = to_display + str(image) + '\n'
                    to_display = to_display + '\t\tImage url  : {}'.format(
                        image.url) + '\n'

            if page.partial_matching_images:
                to_display = to_display + '\t{} Partial Matches found: '.format(
                    len(page.partial_matching_images)) + '\n'

                for image in page.partial_matching_images:
                    to_display = to_display + '\t\tImage url  : {}'.format(
                        image.url) + '\n'

    if annotations.web_entities:
        to_display = to_display + '\n{} Web entities found: '.format(
            len(annotations.web_entities)) + '\n'

        for entity in annotations.web_entities:
            to_display = to_display + '\n\tScore      : {}'.format(
                entity.score) + '\n'
            to_display = to_display + u'\tDescription: {}'.format(
                entity.description) + '\n'

    if annotations.visually_similar_images:
        to_display = to_display + '\n{} visually similar images found:\n'.format(
            len(annotations.visually_similar_images)) + '\n'

        for image in annotations.visually_similar_images:
            to_display = to_display + '\tImage url    : {}'.format(
                image.url) + '\n'

    if response.error.message:
        raise Exception('{}\nFor more info on error messages, check: '
                        'https://cloud.google.com/apis/design/errors'.format(
                            response.error.message))
    return to_display
Ejemplo n.º 9
0
def predict():
    # Get the data from the POST request.
    data = request.get_json(force=True)

    output = {"status": "false"}

    # Loads the image into memory
    # with io.open(file_name, 'rb') as image_file:
    print("--------------------------------------------------")
    try:
        # content = image_file.read()
        # content =
        # file1 = open("D:/Personal/chaand_ki_khoj/in_the_end/Datathon2020/TamuHack21/Images/calculator.txt","r+")
        # content = binascii.a2b_base64()
        img_str = data['image']
        imgdata = base64.b64decode(img_str.split(",")[1])
        # filename = 'some_image.jpg'  # I assume you have a way of picking unique filenames
        # with open(filename, 'wb') as f:
        #     f.write(imgdata)

        # image_file = open(filename,'rb')
        # content = image_file.read()
        # file1.close()

        # csv_line = [filename.split("\\")[-1]]
        csv_line = []
        image = vision.Image(content=imgdata)

        response = client.text_detection(image=image)
        texts = response.text_annotations

        text_words = [text.description for text in texts]
        # first one is a combined text already but with new lines
        text_combined = " ".join(text_words[1:])
        #            print(text_combined)
        csv_line.append("TXT-" + text_combined)

        if response.error.message:
            raise Exception(
                '{}\nFor more info on error messages, check: '
                'https://cloud.google.com/apis/design/errors'.format(
                    response.error.message))

        objects = client.object_localization(
            image=image).localized_object_annotations

        item_desc = []
        #            print('Number of objects found: {}'.format(len(objects)))
        for object_ in objects:
            #                print('{} (confidence: {})'.format(object_.name, object_.score))
            csv_line.append("OBJ-" + object_.name + " (" +
                            "{:.2f}".format(object_.score) + ")")
            for obj_name in object_.name.split(" "):
                item_desc.append((obj_name.lower(), object_.score))

            # csv_line.append(str(object_.score))

        # Performs label detection on the image file
        response = client.label_detection(image=image)
        labels = response.label_annotations

        #            print('Labels:')
        for label in labels:
            #                print(label.description, label.score)
            csv_line.append(label.description + " (" +
                            "{:.2f}".format(label.score) + ")")
            for lbl_name in label.description.split(" "):
                item_desc.append((lbl_name.lower(), label.score))
            # csv_line.append(str(label.score))

        ## code to checkif we are doing this
        item_desc = sorted(item_desc, key=lambda x: x[1], reverse=True)
        notAllowed = False
        result = [
            "NA", "yes", "yes", "You are good to go.", "You are good to go.",
            "https://www.tsa.gov/travel/security-screening/whatcanibring/all?combine=&page=2"
        ]

        if data['source'] == data['destination']:
            for lbl, _ in item_desc:
                if lbl in ('seeds', 'plant', 'seed', 'plants'):
                    result = [lbl] + category['seed']
                    notAllowed = True
                    print(result)
                    break

        if not notAllowed:
            for lbl, _ in item_desc:
                if lbl in keywords:
                    result = [lbl] + category[keywords[lbl]]
                    notAllowed = True
                    print(result)
                    break

        if not notAllowed:
            for lbl, _ in item_desc:
                if lbl in broad:
                    result = [lbl] + category[broad[lbl]]
                    notAllowed = True
                    print(result)
                    break

        # myCsvRow = ",".join(csv_line)
        # myCsvRow += "\n"
        # myCsvRow = ",".join(result) + "," + myCsvRow
        # with open('final.csv','a') as fd:
        #     fd.write(myCsvRow)

        print("DONE")
        isError = False
    except Exception as ex:
        result = [
            "NA", "err", "err", "ERROR", "ERROR",
            "https://www.tsa.gov/travel/security-screening/whatcanibring/all?combine=&page=2"
        ]
        isError = True
        print("ERROR", ex)

    output["debugMatchLabel"] = result[0]
    output["isAllowedCarry"] = result[1]
    output["isAllowedCheckin"] = result[2]
    output["descriptionCabin"] = result[3]
    output["descriptionCheckin"] = result[4]
    output["moreInfoLink"] = result[5]
    output["status"] = str(not isError)

    return jsonify(output)
Ejemplo n.º 10
0
def load_990(url):
    pdf_request = requests.get(url)

    # After downloading PDF, grab a single page with single_file=True
    # Also, DPI is very high, but needs to be to get consistent OCR results
    pages = convert_from_bytes(pdf_request.content, dpi=400, single_file=True)
    page_1 = pages[0]

    # The info we're after is at the top of the report, crop it out
    w, h = page_1.size
    page_1_cropped = page_1.crop(
        (math.ceil(w * .04), math.ceil(h * .07), w - math.ceil(w * .09),
         math.ceil(h * .20)))
    # page_1_cropped.save("/Users/btalberg/Projects/WildFlower/irs-990-lookup/example.ppm")
    img_byte_arr = image_to_byte_array(page_1_cropped, format='PPM')

    #page_1_cropped = page_1_cropped.convert('JPEG')
    # text = str(pytesseract.image_to_string(page_1_cropped))
    # text = text.replace('-\n', '')

    client = vision.ImageAnnotatorClient()

    # image_to_byte_array(img_byte_arr)
    response = client.document_text_detection(image=vision.Image(
        content=img_byte_arr))

    document = response.full_text_annotation

    # Zero in on the line containing start/end dates for 990 fiscal year and
    # crop again
    needle = None
    for page in document.pages:
        for block in page.blocks:
            for paragraph in block.paragraphs:
                nested_text = list(
                    map(lambda w: list(map(lambda s: s.text, w.symbols)),
                        paragraph.words))
                flattened_text = list(
                    itertools.chain.from_iterable(nested_text))
                text = "".join(flattened_text).lower()

                if "calendaryear" in text:
                    needle = paragraph
                    break

            if needle is not None:
                break

        if needle is not None:
            break

    w, h = needle.bounding_box.vertices[2].x - \
        needle.bounding_box.vertices[0].x, needle.bounding_box.vertices[2].y - \
        needle.bounding_box.vertices[0].y
    left, top = needle.bounding_box.vertices[0].x - math.ceil(
        w * .05), needle.bounding_box.vertices[0].y - math.ceil(h * .2)
    right, bottom = page_1_cropped.width, needle.bounding_box.vertices[
        2].y + math.ceil(h * .2)

    page_1_cropped = page_1_cropped.crop((left, top, right, bottom))
    # page_1_cropped.save("/Users/btalberg/Projects/WildFlower/irs-990-lookup/example2.ppm")
    img_byte_arr = image_to_byte_array(page_1_cropped, format='PPM')

    response = client.document_text_detection(image=vision.Image(
        content=img_byte_arr))

    text = response.text_annotations[0].description
    return text
# Get a list of the files in the Cloud Storage Bucket
files = storage_client.bucket(bucket_name).list_blobs()
bucket = storage_client.bucket(bucket_name)

print('Processing image files from GCS. This will take a few minutes..')

# Process files from Cloud Storage and save the result to send to BigQuery
cnt = 0
for file in files:
    if file.name.endswith('jpg') or file.name.endswith('png'):
        file_content = file.download_as_string()

        # TBD: Create a Vision API image object called image_object
        # Ref: https://googleapis.dev/python/vision/latest/gapic/v1/types.html#google.cloud.vision_v1.types.Image
        image_object = vision.Image()
        image_object.content = file_content
        response = vision_client.text_detection(image=image_object)

        # TBD: Detect text in the image and save the response data into an object called response
        # Ref: https://googleapis.dev/python/vision/latest/gapic/v1/api.html#google.cloud.vision_v1.ImageAnnotatorClient.document_text_detection

        # Save the text content found by the vision API into a variable called text_data
        text_data = response.text_annotations[0].description
        print(text_data)
        file_name = file.name.split('.')[0] + '.txt'
        blob = bucket.blob(file_name)
        # Upload the contents of the text_data string variable to the Cloud Storage file
        blob.upload_from_string(text_data, content_type='text/plain')
        # Extract the description and locale data from the response file
        # into variables called desc and locale
Ejemplo n.º 12
0
def upload():
    uploaded_file = request.files.get('file')
    name = request.form.get('name')
    loc = request.form.get('loc')
    date = request.form.get('date')
    id = request.form.get('id')
    file_changed = request.form.get('fileChanged')
    old_url = request.form.get('file')
    img_name = request.form.get('img_name')
    edit = request.form.get('edit')

    # Instantiates a client
    vision_client = vision.ImageAnnotatorClient()

    if (file_changed == 'true'):
        # Create a Cloud Storage client.
        gcs = storage.Client()

        if (edit == 'true'):
            delete_bucket = gcs.bucket(CLOUD_STORAGE_BUCKET)
            del_blob = delete_bucket.blob(img_name)
            del_blob.delete()

        # Get the bucket that the file will be uploaded to.
        bucket = gcs.get_bucket(CLOUD_STORAGE_BUCKET)

        # Create a new blob and upload the file's content.
        blob = bucket.blob(uploaded_file.filename)

        blob.upload_from_string(
            uploaded_file.read(),
            content_type=uploaded_file.content_type
        )

        # Make the blob publicly viewable.
        blob.make_public()
        # print(blob.name)
        url = blob.public_url

        img_name = blob.name

        source_uri = "gs://{}/{}".format(CLOUD_STORAGE_BUCKET, blob.name)
        image = vision.Image(source=vision.ImageSource(gcs_image_uri=source_uri))

        # Performs label detection on the image file
        labels = vision_client.label_detection(image=image).label_annotations

        category = ''
        for label in labels:
            if 'human' == label.description.lower():
                category = 'People'
                break
            elif ('dog' == label.description.lower()) or ('cat' == label.description.lower()) or ('mammal' == label.description.lower()):
                category = 'Animal'
                break
            elif 'flower' == label.description.lower():
                category = 'Flower'
                break
            else:
                category = 'Others'
    else:
        url = old_url
        category = request.form.get('label')

    key = datastore_client.key('Photo Book', id)
    entity = datastore.Entity(key=key)
    entity.update({
        'name': name,
        'location': loc,
        'date': date,
        'url': url,
        'category': category,
        'id': id,
        'img_name': img_name
    })

    datastore_client.put(entity)

    return {'response': 'res'}
Ejemplo n.º 13
0
def render_doc_test(file):
    with io.open(file, 'rb') as image_file:
        content = image_file.read()

    image = vision.Image(content=content)
    response = client.text_detection(image=image)
    res = response.text_annotations
    xMax = res[0].bounding_poly.vertices[2].x
    yMax = res[0].bounding_poly.vertices[2].y

    varX = 5 * xMax / 100
    varY = 5 * yMax / 100

    #print(res)
    '''
    print('Texts:')

    
    for text in res[1:]:
        print('\n"{}"'.format(text.description))
        vertices = (['({},{})'.format(vertex.x, vertex.y)
            for vertex in text.bounding_poly.vertices ])
        print('bounds: {}'.format(','.join(vertices)))
    '''

    print("Reformation")

    #Reformation du Tableau
    #BESOIN DE TRIER LES VERTIXES
    data = res[1:]
    '''
    #test comparaison de 2 vertixes
    
    print("Data10 : "+str(data[9].bounding_poly.vertices[0].x)+","+str(data[9].bounding_poly.vertices[0].y))
    print("Data11 : "+str(data[10].bounding_poly.vertices[0].x)+","+str(data[10].bounding_poly.vertices[0].y))
    print("Data13 : "+str(data[12].bounding_poly.vertices[0].x)+","+str(data[12].bounding_poly.vertices[0].y))
    print(compare(data[9],data[10]))
    print(compare(data[9],data[12]))
    print(compare(data[10],data[12]))
    
    #Test sur 
    
    #data = sort(data)
    '''
    bubbleSort(data, varX, varY)
    '''
    print(len(data))


    for text in data:
        print('\n"{}"'.format(text.description))
        vertices = (['({},{})'.format(vertex.x, vertex.y)
            for vertex in text.bounding_poly.vertices ])
        print('bounds: {}'.format(','.join(vertices)))
    '''

    tab = []
    ligne = []
    for text in data:
        #Si détection de texte vide
        if text.description == "":
            continue
        #Si début de nouvelle ligne
        if len(ligne) == 0:
            ligne.append(text.description)
            #Mémoire du dernier mot détecté
            lastText = text
        else:
            #Ajout côte à côte
            if abs(text.bounding_poly.vertices[0].y -
                   lastText.bounding_poly.vertices[1].y) < varY and abs(
                       text.bounding_poly.vertices[0].x -
                       lastText.bounding_poly.vertices[1].x) < varX:
                ligne[-1] += " " + text.description
                lastText = text
                continue
            #Ajout même ligne mais plus loin
            if abs(text.bounding_poly.vertices[0].y -
                   lastText.bounding_poly.vertices[1].y) < varY and abs(
                       text.bounding_poly.vertices[0].x -
                       lastText.bounding_poly.vertices[1].x) > varX:
                ligne.append(text.description)
                lastText = text
                continue
            #nouvelle ligne
            tab.append(ligne)
            ligne = [text.description]
            lastText = text
    for row in tab:
        for item in row:
            print(item, end="   ")
        print("")
    print("\nInscription dans le fichier Output.csv")
    import csv

    with open("Output.csv", "w", encoding="utf-8-sig", newline="") as f:
        writer = csv.writer(f)
        writer.writerows(tab)
Ejemplo n.º 14
0
  minX = min(box, key = lambda k: k.x).x
  maxX = max(box, key = lambda k: k.x).x
  return ((minX, minY), (maxX, maxY)) 

def reducer(accum, item):
  print(item)
  accum.append(item)
  return accum


img_file = 'tests/PTDC0005.JPG'
res = None
with io.open(img_file, 'rb') as image_file:
    content = image_file.read()
    client = vision.ImageAnnotatorClient()
    image = vision.Image(content=content)
    print('Detecting text...')
    res = client.document_text_detection(image=image)
    print('Done!')

if res.error.message:
  print('Cloud Vision failed:', res.error.message, res.error)

img = cv.imread(img_file)

words = []

pages = res.full_text_annotation.pages

for page in pages:
  for block in page.blocks:
Ejemplo n.º 15
0
def upload_photo():
    photo = request.files["file"]

    # Create a Cloud Storage client.
    storage_client = storage.Client()

    # Get the bucket that the file will be uploaded to.
    bucket = storage_client.get_bucket(CLOUD_STORAGE_BUCKET)

    # Create a new blob and upload the file's content.
    blob = bucket.blob(photo.filename)
    blob.upload_from_string(photo.read(), content_type=photo.content_type)

    # Make the blob publicly viewable.
    blob.make_public()

    # Create a Cloud Vision client.
    vision_client = vision.ImageAnnotatorClient()

    # Use the Cloud Vision client to detect a face for our image.
    source_uri = "gs://{}/{}".format(CLOUD_STORAGE_BUCKET, blob.name)
    image = vision.Image(source=vision.ImageSource(gcs_image_uri=source_uri))
    faces = vision_client.face_detection(image=image).face_annotations

    # If a face is detected, save to Datastore the likelihood that the face
    # displays 'joy,' as determined by Google's Machine Learning algorithm.
    if len(faces) > 0:
        face = faces[0]

        # Convert the likelihood string.
        likelihoods = [
            "Unknown",
            "Very Unlikely",
            "Unlikely",
            "Possible",
            "Likely",
            "Very Likely",
        ]
        face_joy = likelihoods[face.joy_likelihood]
    else:
        face_joy = "Unknown"

    # Create a Cloud Datastore client.
    datastore_client = datastore.Client()

    # Fetch the current date / time.
    current_datetime = datetime.now()

    # The kind for the new entity.
    kind = "Faces"

    # The name/ID for the new entity.
    name = blob.name

    # Create the Cloud Datastore key for the new entity.
    key = datastore_client.key(kind, name)

    # Construct the new entity using the key. Set dictionary values for entity
    # keys blob_name, storage_public_url, timestamp, and joy.
    entity = datastore.Entity(key)
    entity["blob_name"] = blob.name
    entity["image_public_url"] = blob.public_url
    entity["timestamp"] = current_datetime
    entity["joy"] = face_joy

    # Save the new entity to Datastore.
    datastore_client.put(entity)

    # Redirect to the home page.
    return redirect("/")
Ejemplo n.º 16
0
# cv_image = cv2.imread('images/pic.jpg')
# cv_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
# _, threshold = cv2.threshold(cv_image, 240, 255, cv2.THRESH_BINARY)
# _, contours, _ = cv2.findContours(threshold, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

## Google Vision API is used in this section of 6c and 6d.
## It is also required to set the Environment Variable "GOOGLE_APPLICATION_CREDENTIALS" to a json file provided by
## Google. Check https://cloud.google.com/docs/authentication/getting-started for more details.

## Explicitly use service account credentials by specifying the private key file.
storage_client = storage.Client.from_service_account_json(
    'google-cloud/service-account.json')

object_annotator = vision.ImageAnnotatorClient()
image = vision.Image()

df['thumbnail_objects'] = 0
df['thumbnail_text_length'] = 0
df['thumbnail_text_content'] = None
max_text_length = 0

for i, url in (enumerate(tqdm(df['thumbnail'],
                              desc='Scanning thumbnails...'))):
    image.source.image_uri = url

    ## 6c. Object Detection

    objects = object_annotator.object_localization(
        image=image).localized_object_annotations
    df.iloc[i, df.columns.get_loc('thumbnail_objects')] = len(objects)
Ejemplo n.º 17
0
# Create an array to store results data to be inserted into the BigQuery table
rows_for_bq = []

# Get a list of the files in the Cloud Storage Bucket
files = storage_client.bucket(bucket_name).list_blobs()
bucket = storage_client.bucket(bucket_name)

print('Processing image files from GCS. This will take a few minutes..')

# Process files from Cloud Storage and save the result to send to BigQuery
for file in files:
    if file.name.endswith('jpg') or file.name.endswith('png'):
        file_content = file.download_as_string()

        # TBD: Create a Vision API image object called image_object
        image_object = vision.Image(content=file_content)
        # Ref: https://googleapis.dev/python/vision/latest/gapic/v1/types.html#google.cloud.vision_v1.types.Image

        # TBD: Detect text in the image and save the response data into an object called response
        response = client.text_detection(image=image_object)
        # Ref: https://googleapis.dev/python/vision/latest/gapic/v1/api.html#google.cloud.vision_v1.ImageAnnotatorClient.document_text_detection

        # Save the text content found by the vision API into a variable called text_data
        text_data = response.text_annotations[0].description

        # Save the text detection response data in <filename>.txt to cloud storage
        file_name = file.name.split('.')[0] + '.txt'
        blob = bucket.blob(file_name)
        # Upload the contents of the text_data string variable to the Cloud Storage file
        blob.upload_from_string(text_data, content_type='text/plain')