def execute(self, params):

        #params = translateInputParameters(sys.argv)

        page_dict = {"content": params[0], "lang": params[1]}
        if (len(params) > 2):
            xpaths_json = json.loads(params[2])
        else:
            xpaths_json = []

        classifier = AddToBasketButtonClassifier()

        classifyneeded = True
        returnlist = []

        # method for checking if a given node looks like a button
        for xpath in xpaths_json:
            check = classifier.classifygivennode(page_dict, xpath)
            if check == True:
                classifyneeded = False
                returnlist.append("true")
                break

        if classifyneeded == True:
            classifier.classify(page_dict, None)
            returnlist.append(classifier.getCertainty())
            returnlist.append(classifier.getNodePath())

        #print translateReturnValues(returnlist)
        return translateReturnValues(returnlist)
 def execute(self,params):
     '''
     Parameters:
     1 fully qualified domain name ( my.example.com )
     
     Returns:
     isinthenewsfeed
     '''
     
     #params = translateInputParameters(sys.argv)
     #processing = processing_on()
     
     fqdn = params[0]
     
     gnfChecker = GoogleNewsFeedsChecker()
     isit = gnfChecker.isnewsdomain(fqdn)
     
     if isit == True:
         retval = "true"
     else:
         retval = "false"
     
     returnlist = []
     returnlist.append(retval)
     
     # processing_off(processing)
     # print translateReturnValues(returnlist)
     return translateReturnValues(returnlist)
Beispiel #3
0
 def execute(self,params):
     #sys.path.insert(0,'/home/lequocdo/workspace/leadsdm')
     #print sys.path
     
     #params = translateInputParameters(sys.argv)
     
     page_dict = {"content":params[0],"lang":params[1]}
      
     classifier = ProductOfferingPageClassifier()
     classifier.classify(page_dict, None)
     
     features = classifier.getFeaturesVals()
     certainty = classifier.getCertainty()
     returnlist = []
     returnlist.append("true" if certainty==1 else "false")
     returnlist.append(features)
     button = classifier.addToBasketButtonClassifier.getNodePath()
     if button is not None:
         returnlist.append(button)
     else:
         returnlist.append(" ")
     bag = classifier.bagLinkClassifier.getNodePath()
     if bag is not None:
         returnlist.append(bag)
     else:
         returnlist.append(" ")
     #returnlist.extend([str(f) for f in features])
     # processing_off(processing)
     # print translateReturnValues(returnlist)
     return translateReturnValues(returnlist)
Beispiel #4
0
    def execute(self, params):
        '''
        Parameters:
        1 pagecontent
        2 objectofextraction ("ecom_product_name","ecom_product_price")
        3 extractiontuples
        
        Returns:
        successfulextractiontuple
        [extractedobjname
        extractedobjvalue]
        ...
        '''

        #params = translateInputParameters(sys.argv)
        #processing = processing_on()

        page_content = params[0]
        page_tree = xpathops.content2tree(page_content)
        #    page_content = page_content.replace(unichr(163),'$')
        #     print lxml.html.tostring(page_tree)
        #     print page_content
        #     print page_tree.getpath(page_tree.xpath(u"//*[contains(.,'25')]")[-1])
        #     print page_tree.xpath('/html/body/section[7]/div[1]//text()')
        page_dict = {"tree": page_tree}

        extraction_obj_name = params[1]
        extraction_tuples_list = params[2]
        if type(extraction_tuples_list) is tuple:
            extraction_tuples_list = [extraction_tuples_list]

        returnlist = []

        if len(extraction_tuples_list) > 0:

            values_extractor = ExtractValues(page_dict)
            verifier = extracthelpersfactory.verifierFactoryMethod(
                extraction_obj_name)
            miner = extracthelpersfactory.minerFactoryMethod(
                extraction_obj_name)

            describedvalues_list, successful_extraction_tuple = values_extractor.extract(
                extraction_tuples_list, verifier, miner)

            returnlist.append(successful_extraction_tuple)
            for nameval in describedvalues_list:
                returnlist.append(nameval[0])
                returnlist.append(nameval[1])

        # processing_off(processing)
        # print translateReturnValues(returnlist)
        return translateReturnValues(returnlist)
 def execute(self,params):
     '''
     Parameters:
     1 pagecontent
     2 objectofextraction ("ecom_product_name","ecom_product_price")
     3 extractiontuples
     
     Returns:
     successfulextractiontuple
     [extractedobjname
     extractedobjvalue]
     ...
     '''
     
     #params = translateInputParameters(sys.argv)
     #processing = processing_on()
     
     page_content = params[0]
     page_tree = xpathops.content2tree(page_content)
 #    page_content = page_content.replace(unichr(163),'$')
 #     print lxml.html.tostring(page_tree)
 #     print page_content
 #     print page_tree.getpath(page_tree.xpath(u"//*[contains(.,'25')]")[-1])
 #     print page_tree.xpath('/html/body/section[7]/div[1]//text()')
     page_dict = {"tree":page_tree}
     
     extraction_obj_name = params[1]
     extraction_tuples_list = params[2]
     if type(extraction_tuples_list) is tuple:
         extraction_tuples_list = [extraction_tuples_list]       
     
     returnlist = []
     
     if len(extraction_tuples_list) > 0:
     
         values_extractor = ExtractValues(page_dict)
         verifier = extracthelpersfactory.verifierFactoryMethod(extraction_obj_name)
         miner = extracthelpersfactory.minerFactoryMethod(extraction_obj_name)
         
         describedvalues_list, successful_extraction_tuple = values_extractor.extract(extraction_tuples_list, verifier, miner)
         
         returnlist.append(successful_extraction_tuple)
         for nameval in describedvalues_list:
             returnlist.append(nameval[0])
             returnlist.append(nameval[1])
         
     # processing_off(processing)
     # print translateReturnValues(returnlist)
     return translateReturnValues(returnlist)
     
     
Beispiel #6
0
 def execute(self,params):
     '''
     Parameters:
     1 new page content
     2 new page language
     3 new page's site -> is button extracted?
     4 
     '''
     #sys.path.insert(0,'/home/lequocdo/workspace/leadsdm')
     #print sys.path
     
     #params = translateInputParameters(sys.argv)
      
     page_dict = {"content":params[0],"lang":params[1]}
     site_bag_button_extracted = True if params[2] == "true" else False
     product_cluster_center = params[3]
     category_cluster_center = params[4]
     product_cluster_50pc_dist = params[5]
     product_cluster_80pc_dist = params[6]
     category_cluster_50pc_dist = params[7]
     category_cluster_80pc_dist = params[8]
     scaler_mean = params[9]
     scaler_std = params[10]
      
     classifier = ProductOfferingPageClassifier()
     classifier.classify(page_dict, None)
      
     features = classifier.getFeaturesVals()
     
     is_bag_product_page_assumption = None
     if site_bag_button_extracted == True:
         if features[-1] == 1:
             is_bag_product_page_assumption = True
         else:
             is_bag_product_page_assumption = False
     
     pageclusterfinder = findpagecluster.FindPageCluster(product_cluster_center, category_cluster_center, product_cluster_50pc_dist, product_cluster_80pc_dist, 
                                         category_cluster_50pc_dist, category_cluster_80pc_dist, scaler_mean, scaler_std)
     pageclusterfinder.find(page_dict)
     
     if page_dict.get("category") == "ecom_product":
         # if product, 3 possibilities:
         # either we are sure
         if is_bag_product_page_assumption == True:
             pass
         # or we assume it's a product page
         elif is_bag_product_page_assumption == None:
             pass
         # or we are sure it's not - only looks like
         else:
             page_dict["category"] = "ecom_other"
     
     
     returnlist = []
     returnlist.append(page_dict["category"])
     returnlist.append(str(features))
     # processing_off(processing)
     # print translateReturnValues(returnlist)
     return translateReturnValues(returnlist)
     
 #     returnlist.append("true" if certainty==1 else "false")
 #     returnlist.extend([str(f) for f in features])
     
     
     
     
     
     
 def execute(self,params):
     #time.sleep(1)
     logger = logging.getLogger("leads")
     logger.debug("params: %s" % str(params))
     return translateReturnValues([params[0]+" "+params[1]+" of Java, it's Python!"])