def _system_start(self): """ Sistemi calistirir. Sistem konfigurasyonunda bulunan modulleri kendi processleri ile yaratir. """ #base port port=int(Config.getconfig("MOD_AUTOMATION").get("base_port")) #get configured modules config=Config.getconfig("MOD_AUTOMATION") for group in ["crawlers","scrapers","matchers","updaters"]: list=config.get(group).split(",") for name in list: name=name.strip() #get count count=1 if name.endswith(")"): tokens=name.split("(") if len(tokens)>1: name=tokens[0] count=int(tokens[1][:-1]) #create proxies for index in range(count): proxy=ModuleProxy(name.strip(),"localhost",port) self.modules.append(proxy) port=port+1 #stop all rogue modules (that might not have been shutdown) #start all modules for module in self.modules: module.spawn()
def __init__(self): """ """ #get api end point url=Config.getconfig("API").get("cimri_service_url") #initialize super(MerchantsAPI,self).__init__(url)
def __init__(self): #get logger self.logger=Logger(self.__class__.__name__) #get configuration self.config=Config.getconfig("WEB") #log url errors by default self._log_url_faults=False
def getthreadlogs(self): """ Sistemde kayitli olan butun threadlerin listesini alir @rtype: list @return: thread listesi """ #get thread output log Printer.buffer=[] analyzer=Analyzer(console=False) analyzer.list_tasks(Config.getconfig("SYS").get("task_store_path"),True) return Printer.buffer
def _parse(self): """ Merchant XML'i parse eder @rtype: bool @return: XML hatasiz parse oluyor ise True aksi takdirde False """ #get namespace ns=Config.getconfig("API").get("cimri_merchant_xml_namespace") try: #get encoding (assume utf8) #(also note the use of \uFEFF - in merchant 2824 for example - http://blogs.msdn.com/b/michkap/archive/2005/01/20/357028.aspx) header=ignore_non_alphanumeric(self.xml[:1000].lower()) if header.find("iso88599")>-1: self.encoding="iso-8859-9" else: self.encoding="utf-8" #strip xml header if self.xml.find("<?xml")>-1: index=self.xml.find(">") self.xml=self.xml[index+1:].strip() elif self.xml.find("<")>-1: index=self.xml.find("<") self.xml=self.xml[index:].strip() #parser self.parser=etree.XMLParser(recover=True) #ns_clean=True #get root element root=etree.fromstring(self.xml,self.parser) #parse items self.items=[] for element in root.iter("{"+ns+"}MerchantItem"): self.items.append(MerchantItem(xml=element,encoding=self.encoding)) for element in root.iter("MerchantItem"): self.items.append(MerchantItem(xml=element,encoding=self.encoding)) except Exception as e: self.logger.error("exception parsing merchant xml: %s",e) return False return True
def gettaskerrors(self,id): """ Sistemde kayidi olan belli bir taskin hata loglarini alir @type id: str @param id: task IDsi @rtype: list @return: task hata loglari """ #get task error list Printer.buffer=[] analyzer=Analyzer(console=False) analyzer.print_report(Config.getconfig("SYS").get("task_store_path"),id,section="error") return Printer.buffer
def gettaskresults(self,id): """ Sistemde kayidi olan belli bir taskin sonuclarini alir @type id: str @param id: task IDsi @rtype: list @return: task sonuclari """ #get thread results Printer.buffer=[] analyzer=Analyzer(console=False) analyzer.print_report(Config.getconfig("SYS").get("task_store_path"),id,section="result") return Printer.buffer
def getthreadinfo(self,id): """ Sistemde kayidi olan belli bir islem threadinin bilgilerini alir @type id: str @param id: thread IDsi @rtype: list @return: thread bilgileri """ #get thread info Printer.buffer=[] analyzer=Analyzer(console=False) analyzer.print_report(Config.getconfig("SYS").get("task_store_path"),id) return Printer.buffer
def _validate(self): """ Merchant XML'i validate eder @rtype: bool @return: XML valid ise True aksi takdirde False """ #TEMP - SKIP VALIDATION return True #get schema file xsd=Config.getconfig("API").get("cimri_merchant_xml_schema") #open schema file try: f=open(xsd,'r') except Exception as e: self.logger.error("exception reading merchant xml schema file: %s",xsd) return False #validate xml try: #get schema schema=etree.XMLSchema( etree.XML(f.read()) ) #create parser self.parser=etree.XMLParser(schema=scheme) #validate root=etree.fromstring(self.xml,self.parser) except Exception as e: self.logger.error("validation failed on merchant xml "+str(e)) finally: f.close() return True
def from_xml(self,xml): """ MerchantItem fieldlarini bir xml elementina dayanarak initialize eder. @type xml: str @param xml: MerchantItem fieldlarini initialize etmek icin kullanilabilecek xml element """ #get namespace ns=Config.getconfig("API").get("cimri_merchant_xml_namespace") nslen=len(ns) #parse from xml for element in xml.iter(): keys=[element.tag, element.tag[nslen+2:]] for key in keys: if key in MerchantItem.fieldmap: try: setattr(self, MerchantItem.fieldmap[key], element.text.strip()) except Exception as e: pass
def __init__(self,section=None): """ @type section: str @param section: cache operasyonlari icin kullanilacak cache sectioni. eger None ise genel cache kullanilir. """ #cache section self.section=section #make sure section is specified right if self.section is not None: self.section=self.section.strip() if self.section=="": self.section=None #cache path self.path=os.path.join(Config.getconfig("SYS").get("cache_path"),("__global__" if section is None else section)) #if cache path doesn't exist, create it try: if not os.path.exists(self.path): os.makedirs(self.path) except Exception as e: passs
def getstatus(cls): """ Cache ve icerisinde bulunan bilgiler hakkinda genel bilgiler verir @rtype: dict @return: her cache bolumu hakkinda cache bolumunun ismi ve icerisindeki dosyalarin sayisini iceren bilgiler """ #get cache sections path=Config.getconfig("SYS").get("cache_path") sections=filter(lambda f:os.path.isdir(os.path.join(path,f)), os.listdir(path)) #organize data=[{"id":section} for section in sections] #get counts for section in data: sectionpath=os.path.join(path,section["id"]) files=filter(lambda f: not os.path.isdir(os.path.join(sectionpath,f)), os.listdir(sectionpath)) #add counts section["count"]=len(files) return data
def __init__(self): #get api url url=Config.getconfig("API").get("cimri_solr_mi_url") #initialize super(MerchantDB,self).__init__(url)
"""
help='print out a report for a task or task thread. a task id or thread id can be specified here') parser.add_argument('--last-task', action='store_true', dest='lasttask', help='print out a report for the last task that was created') parser.add_argument('--last-thread', action='store_true', dest='lastthread', help='print out a report for the last thread that was created') parser.add_argument('--summary', action='store_true', dest='summary', help='when printing out reports, display only summary information about tasks') parser.add_argument('--path', dest='path', default=Config.getconfig("SYS").get("task_store_path"), help='provide a path different than the default system task archive path') #compile arguments args=parser.parse_args() #dispatch analyzer=Analyzer(console=True) if args.list==True: analyzer.list_tasks(args.path,args.threadsonly) elif args.id!=None: analyzer.print_report(args.path, id=args.id, section=("info" if args.summary is True else None), file=args.file) elif args.lasttask==True: analyzer.print_report(args.path, lasttask=True, section=("info" if args.summary is True else None), file=args.file)
def __init__(self): #get api url url=Config.getconfig("API").get("cimri_solr_url") #initialize super(CatalogueDB,self).__init__(url)