def filter_data_by_tjid(records, tjid): """ 抽取出某个样本的信息 Parameters ------------ records : list list of *Record object tjid : str barcode or tjid Returns -------- filtered_data : list """ if len(records) == 0: return [] elif isinstance(records[0], GeneTestRecord): return [record for record in records if record.barcode == tjid] elif isinstance(records[0], PhysicalExaminationRecord) or \ isinstance(records[0], QuestionnaireRecord): return [record for record in records if record.tjid == tjid] else: logger.error('Object in records are not support!') exit(1)
def wrapped(*args, **kwargs): try: fn(*args, **kwargs) except Exception, e: logger.error('Service curd error: %s' % e) db.session.rollback() raise TypeError
def parse_negatives(self): if not self.nfilename: logger.error('No se ha especificado un archivo '\ 'de trazas negativas!') raise Exception('No se ha especificado un archivo '\ 'de trazas negativas!') if self.verbose: print 'Starting parse of negative traces\n' start_time = time.time() if self.nfilename.endswith('.xes'): parser = NegativeParser(self.nfilename, required_dimension=self.dim, event_dictionary=self.event_dictionary) elif self.filename.endswith('.txt'): raise Exception('Not implemented yet!') else: logger.error("Error in file %s extension. Only '.xes'"\ " is allowed!",(self.nfilename or '')) raise Exception("Error in file %s extension. Only '.xes'"\ " is allowed!"%(self.nfilename or '')) parser.parikhs_vector() self.npv_traces = parser.pv_traces self.npv_set = parser.pv_set self.npv_array = parser.pv_array if self.verbose: print 'Parse of negative traces done\n' elapsed_time = time.time() - start_time print '# RESULTADO obtenido en: ', elapsed_time print '#'*40+'\n'
def _fetch_data(self, outfile, attributes, filters='', header=None, debug=False): cnt_all = 0 out_f, outfile = safewfile(outfile, prompt=(not self.no_confirm), default='O') if header: out_f.write('\t'.join(header) + '\n') logging.info('Dumping "%s"...' % os.path.split(outfile)[1]) for species in self.species_li: dataset = self.get_dataset_name(species) taxid = species[2] if not dataset: continue xml = self._make_query_xml(dataset, attributes=attributes, filters=filters) if debug: logging.info(xml) try: con = self.query_mart(xml) except MartException: import traceback err_msg = traceback.format_exc() logging.error("%s %s" % (species[0], err_msg)) continue cnt = 0 for line in con.split('\n'): if line.strip() != '': out_f.write(str(taxid) + '\t' + line + '\n') cnt += 1 cnt_all += 1 logging.info("%s %s" % (species[0], cnt)) out_f.close() logging.info("Total: %d" % cnt_all)
def get_phi_from_database(tjid): """ 从数据库获取指定样本的个人信息 Parameters ---------- tjid : str specimen barcode Returns -------- phi : PHI PHI object """ conn = connect_database() with conn.cursor() as cur: sql = 'select barcode, name, gender, dob, phone_no, test_product ' \ 'from xy_specimen where barcode=\'%s\';' % tjid cur.execute(sql) res = cur.fetchone() conn.close() if res is None: logger.error('找不到样本%s的个人信息!' % tjid) exit(1) phi = PHI() phi.barcode = res[0] phi.name = ensure_unicode(res[1]) phi.gender = ensure_unicode(res[2]) phi.dob = res[3] phi.phone_no = res[4] phi.test_product = res[5] return phi
def generate_clinvar_lib(data_folder): sys.path.insert(0,data_folder) orig_path = os.getcwd() try: os.chdir(data_folder) logging.info("Generate XM parser") ret = os.system('''generateDS.py -f -o "clinvar_tmp.py" -s "clinvarsubs.py" clinvar_public.xsd''') if ret != 0: logging.error("Unable to generate parser, return code: %s" % ret) raise try: py = open("clinvar_tmp.py").read() # convert py2 to py3 (though they claim it support both versions) py = py.replace("from StringIO import StringIO","from io import StringIO") fout = open("clinvar.py","w") fout.write(py) fout.close() os.unlink("clinvar_tmp.py") # can we import it ? import clinvar logging.info("Found generated clinvar module: %s" % clinvar) except Exception as e: logging.error("Cannot convert to py3: %s" % e) raise finally: os.chdir(orig_path)
def post_vote(): user = current_user if not request.json or not 'talkId' in request.json: abort(400) talkId = request.json['talkId'] try: vote = db.session.query(Vote).filter(Vote.talkId==talkId).filter(Vote.email==user.email).first() except: logger.error("Unexpected error loading the vote:", sys.exc_info()[0]) raise try: if vote == None: vote = Vote() vote.talkId = talkId vote.email = user.email vote.fitsTechfest = request.json['fitsTechfest'] vote.fitsTrack = request.json['fitsTrack'] vote.expectedAttendance = request.json['expectedAttendance'] db.session.add(vote) db.session.commit() logger.debug('User {} voted on talkId {} - {}/{}/{}.'.format(user.email, talkId, vote.fitsTechfest, vote.fitsTrack, vote.expectedAttendance)) except: logger.error("Unexpected error saving the vote:", sys.exc_info()[0]) raise return json.dumps(vote.serialize), 201
def compute_hiperspaces(self): # La característica heurística al buscar conexiones entre # diferentes clusters hace que pueda fallar # por lo que redirigimos la salida para ser silenciosos # en esos casos if not len(self.points) > 0: logger.error('No points to compute hull!') raise Exception('No points to compute hull!') stderr_fd = sys.stderr.fileno() with open('/tmp/qhull-output.log', 'w') as f, stderr_redirected(f): points = list(self.points) logger.info('Searching for hull in dimension %s based on %s points', len(points[0]),len(points)) output = qconvex('n',points) if len(output) == 1: logger.debug('Could not get Hull. Joggle input?') try: dim, facets_nbr, facets = self.__parse_hs_output(output) except IncorrectOutput: logger.warning('Could not get hull') raise CannotGetHull() logger.info('Found hull in dimension %s of %s facets', dim,len(facets)) self.dim = dim self.facets = facets if self.verbose: print "Computed MCH with ",facets_nbr," halfspaces" print 'This are them:\n' for facet in self.facets:print facet return self.dim
def get_participants_info(input_file, webinar_id, details_mark): """ Gather information about the webinar participants :input: a csv file of attendees for a GotoWebinar to read from the webinar id number :return: a list of two lists containing the webinar participants details headers and a list of items representing corresponding header values """ reading_details = 0 values_list = [] remove_row_marker = '*If an attendee left and rejoined the session, the In Session Duration column only includes their first visit.' try: with open(input_file, 'rb') as csv_file: rdr = reader(csv_file) for row in rdr: if not reading_details: if details_mark in row: headers = ['Webinar ID'] + rdr.next() reading_details = 1 continue elif remove_row_marker not in row: values_list.append([webinar_id] + row) return [headers, values_list] except IOError as e: logger.error("Cannot read file '{}'".format(input_file)) logger.debug("Exception:\n{}".format(e))
def parse_traces(self): if self.verbose: print 'Starting parse\n' start_time = time.time() if self.filename.endswith('.xes'): parser = XesParser(self.filename) elif self.filename.endswith('.txt'): parser = AdHocParser(self.filename) else: logger.error("Error in file %s extension. Only '.xes' and '.txt'"\ " are allowed!",(self.filename or '')) raise Exception("Error in file %s extension. Only '.xes' and '.txt'"\ " are allowed!"%(self.filename or '')) parser.parikhs_vector() self.event_dictionary = parser.event_dictionary self.reversed_dictionary = rotate_dict(parser.event_dictionary) self.pv_traces = parser.pv_traces self.pv_set = parser.pv_set self.pv_array = parser.pv_array self.dim = parser.dim if self.verbose: print 'Parse done\n' elapsed_time = time.time() - start_time print '# RESULTADO obtenido en: ', elapsed_time print '#'*40+'\n'
def do_sampling(self, points, *args, **kwargs): facets = [] for _ in xrange(self.samp_num): # When sampling, it can be the case that the calculated # sample is insuficient to calculate MCH, so try a few # times before actually raising error tries = 3 if self.samp_size else 1 while tries: try: points = self.get_sample() qhull = func(self, points, *args, **kwargs) # Agregamos las facetas que ya calculamos qhull.union(facets) # Los puntos no considerados restringen las facetas for outsider in self.pv_set - points: qhull.restrict_to(outsider) facets = qhull.facets tries = 0 except CannotIntegerify, err: raise err except (CannotGetHull,WrongDimension), err: tries -= 1 if tries == 0 and self.samp_size: logger.error('Cannot get MCH. Maybe doing *TOO*'\ ' small sampling?') raise err
def compute_hiperspaces(self): if not len(self.points) > 0: logger.error('No points to compute hull!') raise Exception('No points to compute hull!') # The heuristic caracteristic when searching to connect # different clusters does that it might fail # so we redirect the stdout to avoid such error # being visible to user stderr_fd = sys.stderr.fileno() with open('/tmp/qhull-output.log', 'w') as f, stderr_redirected(f): points = list(self.points) logger.info('Searching for hull in dimension %s based on %s points', len(points[0]),len(points)) output = qconvex('n',points) if len(output) == 1: logger.debug('Could not get Hull. Joggle input?') try: dim, facets_nbr, facets = self.__parse_hs_output(output) except IncorrectOutput: logger.error('Could not get hull') raise CannotGetHull() logger.info('Found hull in dimension %s of %s facets', dim,facets_nbr) self.dim = dim self.facets = facets return self.dim
def get_webinar_info(input_file, details_mark): """ Gather information about the webinar :input: a csv file of attendees for a GotoWebinar to read from :return: a list of two lists containing the webinar details headers and corresponding header values """ try: with open(input_file, 'rb') as csv_file: rdr = reader(csv_file) # read Generated info and advance to next useful headers rdr.next() keys = rdr.next() vals = rdr.next() rdr.next() # read the rest of webinar info while details_mark not in keys: try: headers += clear_empty_from_list(keys) values += clear_empty_from_list(vals) except NameError: headers = clear_empty_from_list(keys) values = clear_empty_from_list(vals) keys = rdr.next() vals = rdr.next() return [headers, values] except IOError as e: logger.error("Cannot read file '{}'".format(input_file)) logger.debug("Exception:\n{}".format(e))
def package_delete(self, data_package): """ Delete a package by package_title return True if the operation success False otherwise """ msg = '' package_title = data_package[u'title'] package_identifier = data_package[u'identifier'] resp = self.package_search( prop='identifier', value=package_identifier ) if not resp: msg = 'Package delete: \'%s\' NOT FOUND.' % package_title logger.error(msg) return False, msg package_id = resp[0][u'id'] try: resp = self.__conn.action.package_delete(id=package_id) except ckanapi.NotFound: msg = 'Package delete: \'%s\' NOT FOUND.' % package_title logger.info(msg) return False, msg except Exception, error: msg = 'Package delete: ERROR to execute the command for %s.: %s' % ( package_title, error ) logger.error(msg) return False, msg
def bulk_update(self): """ Queries SDS for all datasets and injects messages in rabbitmq. """ logger.info('START bulk update') result_json, msg = self.query_all_datasets() if msg: logger.error('BULK update: %s', msg) else: datasets_json = result_json['results']['bindings'] logger.info('BULK update: %s datasets found', len(datasets_json)) rabbit = RabbitMQConnector(**rabbit_config) rabbit.open_connection() rabbit.declare_queue(self.queue_name) counter = 1 for item_json in datasets_json: dataset_identifier = item_json['id']['value'] dataset_url = item_json['dataset']['value'] action = 'update' body = '%(action)s|%(dataset_url)s|%(dataset_identifier)s' % { 'action': action, 'dataset_url': dataset_url, 'dataset_identifier': dataset_identifier} logger.info('BULK update %s: sending \'%s\' in \'%s\'', counter, body, self.queue_name) rabbit.send_message(self.queue_name, body) counter += 1 rabbit.close_connection() logger.info('DONE bulk update')
def get_qhull(self, neg_points=[]): """ From a Petrinet, gets it's representationas a Convex Hull """ # Create an empty Convex Hull qhull = Qhull(neg_points=neg_points) # La normal por defaul para cada facet dim = len(self.transitions) tmpl_normal = [0]*dim # Each transition corresponds to one dimension # transition.label -> dimension number transitions = self.event_dictionary # Each facet corresponds to one place # place.id -> {normal->[arc.value], offset->marking} facets_dict = {} # Iteramos sobre los arcos for arc in self.arcs: # No debería haber arcos nulos if not arc.value: logger.error('We found a zero arc: %s',arc) raise Exception('We found a zero arc: %s',arc) # NOTE recordar que nuestra representación interna de HS es # al revés que el paper (usamos <= 0 en lguar de >= 0) if isinstance(arc.source,Transition): # Si el arco sale de una transition el coeficiente es < 0 coef = -1*arc.value transition = arc.source place = arc.destination else: # Si el arco sale de un place el coeficiente es > 0 coef = arc.value place = arc.source transition = arc.destination x = transitions.setdefault(transition.label,len(transitions)) facet = facets_dict.setdefault(place.id,{'normal':list(tmpl_normal), 'in_transitions':[], 'out_transitions':[], 'offset': -1*place.marking, 'id':place.id}) if coef < 0: facet['in_transitions'].append(transition.label) else: facet['out_transitions'].append(transition.label) if facet['normal'][x]: logger.debug('Coeficient already loaded. Dummy place') coef = 0 facet['normal'][x] = coef facets = [] for pl_id, facet in facets_dict.items(): # Do not create the facet for dummy places if not any(facet['normal']): continue # Values are always integer hs = Halfspace(facet['normal'], facet['offset'], integer_vals=False) logger.debug('Adding facet %s',hs) facets.append(hs) qhull.dim = dim qhull.facets = facets return qhull
def package_search(self, prop, value): """ Search for a package """ resp = None try: resp = self.__conn.action.package_search(fq='%s:%s' % (prop, value)) except Exception, error: logger.error('Error searching for package \'%s:%s\'.' % (prop, value))
def try_read(fn): def wrapped(*args, **kwargs): try: fn(*args, **kwargs) except NoResultFound, e: logger.info(' No Query Result Found: %s' % str(kwargs)) except MultipleResultsFound, e: logger.error('Multiple Query Results Found: %s' % str(kwargs))
def main(*args, **kwargs): usage = """ Usage: ./xes_comparator_script.py <.ini config filename> Config file options: %s\n NOTE: Do you have the needed environment variables? - XES : Path to .xes file with traces (for running PacH) - PNML : Path to .pnml file with Petri net model (for simplifying PROM models) - NXES : Path to .xes file with negative traces - PETRI : Path where simplified .pnml files should be moved to after script ends - STATS : Path where statistic files should be moved to after script ends IMPORTANT: NO PATH MUST END IN '/' (it is added automatically) """%(config_options) if not check_argv(sys.argv, minimum=1, maximum=4): print usage ret = -1 else: ret = 0 try: config_file = sys.argv[1] if not config_file.endswith('.ini'): print config_file, ' does not end in .ini. It should...' raise Exception('Filename has wrong extension') if not isfile(config_file): raise Exception("No such file") if '--debug' in sys.argv: pdb.set_trace() for filename, arguments in parse_config(config_file): comparator = ComparatorXes(filename, **arguments) #comparator.comparator.check_hull(log_file=filename,event_dictionary=comparator.pach.event_dictionary) complexity = comparator.compare(log_file=filename,event_dictionary=comparator.pach.event_dictionary) comparator.generate_pnml() comparator.generate_outputs() if '--verbose' in sys.argv: print complexity pnml_folder,out_folder = parse_config_output(config_file) pwd = os.getcwd() for basename in os.listdir(pwd): if basename.endswith('.pnml'): pnml_file = os.path.join(pwd, basename) if os.path.isfile(pnml_file): shutil.copy2(pnml_file, pnml_folder) os.remove(pnml_file) elif basename.endswith('.out'): out_file = os.path.join(pwd, basename) if os.path.isfile(out_file): shutil.copy2(out_file, out_folder) os.remove(out_file) except Exception, err: ret = 1 if hasattr(err, 'message'): print 'Error: ', err.message else: print 'Error: ', err logger.error('Error: %s' % err, exc_info=True) raise return ret
def process_csv_info(): """ Processes the read information: Separate headers form webinars and participants details. Detect differences in participants headers and cope with them ( keep the longes header and add empty fields in the right positionfor participants info rows that are shorter than the longest header ) Basic error checking and debug messahe logging. :return: 1 on error and 0 on success """ global w_dict, w_header, w_values, w_info global p_header, p_values, p_headers_list # get headers and values for webinars w_header = w_info[0] w_values = [] for key in w_dict: w_values += w_dict[key] # get headers and values for participants p_header, p_values, diffs = p_headers_list[0], [], [] for h in p_headers_list[1:]: # try to find differences in participants headers if len(p_header) < len(h): diffs = [x for x in h if x not in p_header] p_header = h break elif len(h) < len(p_header): diffs = [x for x in p_header if x not in h] break if diffs: diffs_pos = [p_header.index(x) for x in diffs] for key in p_dict: for row in p_dict[key]: if len(row) < len(p_header): # handle differences in input files headers if not diffs: logger.error("Header longer than row but no diffs detected.") return 1 for pos in diffs_pos: insert_pos = int(pos) row.insert(insert_pos, "") elif len(row) > len(p_header): logger.error("Participants row longer than header.Exiting...") logger.debug(''' webinar id:{} final_participants_header:{} row:{} '''.format(key, p_header, row)) return 1 else: break p_values += p_dict[key] return 0
def run(self, tags, rules, data): """Run rules (run all when "rules" is empty, othervise run only these listed there) and return dict with their answers""" results = [] for mod in self.rules: # Skip rule if we are supposed to run only specific rules and this # one is not the choosen one if len(rules) > 0 and mod.__name__ not in rules: logger.debug("Skipping %s because only specific rules are supposed to run" % mod.__name__) continue # Skip this rule if there is no intersection of tags we should run # and tags this rule should be run for if len([val for val in tags if val in mod.tags]) == 0: logger.debug("Skipping %s because it is not tagged with provided tags" % mod.__name__) continue # Finally run the rule func = getattr(mod, 'main') func_text = getattr(mod, 'text') name = getattr(mod, 'name') result = None used = [] text = '' # Reset list of data rule used data.reset_access_list() # Now run the rule try: result = func(data) except DataNotAvailable: logger.error("Data not available for %s" % mod.__name__) result = False except: logger.exception("Something failed badly when executing %s" % mod.__name__) result = False logger.info("Rule %s returned %s" % (mod.__name__, result)) # Store list of data rule has used used = data.get_access_list() # Now if necessary, get description of whats wrong if result: try: text = func_text(result) except: logger.exception("Something failed badly when getting description for %s" % mod.__name__) # Determine what the result was if result: status = 'FAIL' elif result is False: status = 'SKIP' elif result is None: status = 'PASS' else: logger.error("Failed to understand to result of %s" % result) continue # Save what was returned results.append({'label': mod.__name__, 'status': status, 'result': result, 'name': name, 'text': text, 'used': used}) return results
def all_in(self, points): # Sanity check. Are points inside the Hull? # It makes thing slower, speacially in big cases logger.info('Sanity check: Are all points still valid?') where = self.separate(points) if len(where.get('outside',[])) > 0: logger.error('Some points are outisde the hull') raise LostPoints('Some points are outisde the hull: %s', where['outside']) logger.info('Sanity check passed') return True
def all_in(self, points): # Esto es para chequear que no dejando a nadia afuera # hace todo más lento en ejemplos grandes logger.info('Sanity check: Are all points still valid?') where = self.separate(points) if len(where.get('outside',[])) > 0: logger.error('Some points are outisde the hull') raise LostPoints('Some points are outisde the hull: %s', where['outside']) logger.info('Sanity check passed') return True
def startPythm(): """Start the Pythm and renice if it was requested """ config = PythmConfig() renice_level = config.get("pythm", "renice", default=-5, dtype=int) if renice_level != 0: logger.debug("Renicing pythm to %d" % renice_level) try: os.nice(renice_level) except OSError, e: logger.error("Failed to renice: %s" % e)
def resource_show(self, resource_name): """ Get the resource by name """ resp = None try: resp = self.__conn.action.resource_show(id=resource_name) except ckanapi.NotFound: logger.error('Resource \'%s\' not found.' % resource_name) else: logger.info('Resource \'%s\' found.' % package_name) return resp
def package_show(self, package_name): """ Get the package by name """ resp = None try: resp = self.__conn.action.package_show(id=package_name) except ckanapi.NotFound: logger.error('Get package: \'%s\' not found.' % package_name) else: logger.info('Get package: \'%s\' found.' % package_name) return resp
def tile_for(self, thing): if type(thing) is Being: tiles = [t for t in self.values() if t.being is thing] #else its equipment else: tiles = [i for i in [t.inventory for t in self.values()] if thing is i] if len(tiles) != 1: logger.error('tiles %s length != 1', tiles) raise KeyError(tiles) return tiles[0]
def lockfile(name, shared=False, retry=True): """ Use the file fn as a lock file, return when the lock has been acquired. Returns a variable to pass to unlockfile(). """ config.logger.debug("take lockfile %s", name) dirname = os.path.dirname(name) mkdirhier(dirname) if not os.access(dirname, os.W_OK): logger.error("Unable to acquire lock '%s', directory is not writable", name) sys.exit(1) operation = fcntl.LOCK_EX if shared: operation = fcntl.LOCK_SH if not retry: operation = operation | fcntl.LOCK_NB while True: # If we leave the lockfiles lying around there is no problem # but we should clean up after ourselves. This gives potential # for races though. To work around this, when we acquire the lock # we check the file we locked was still the lock file on disk. # by comparing inode numbers. If they don't match or the lockfile # no longer exists, we start again. # This implementation is unfair since the last person to request the # lock is the most likely to win it. # pylint: disable=broad-except # we disable the broad-except because we want to actually catch all possible exceptions try: lock_file = open(name, 'a+') fileno = lock_file.fileno() fcntl.flock(fileno, operation) statinfo = os.fstat(fileno) if os.path.exists(lock_file.name): statinfo2 = os.stat(lock_file.name) if statinfo.st_ino == statinfo2.st_ino: return lock_file lock_file.close() except Exception as exc: try: lock_file.close() except Exception as exc2: config.logger.error("Failed to close the lockfile: %s", exc2) config.logger.error("Failed to acquire the lockfile: %s", exc) if not retry: return None
def set_dataset_data(self, action, dataset_url, dataset_data_rdf, dataset_json): """ Use data from SDS in JSON format and update the ODP [#68136] """ logger.info('START setting \'%s\' dataset data - \'%s\'', action, dataset_url) resp, msg = self.odp.call_action(action, dataset_json, dataset_data_rdf) if not msg: logger.info('DONE setting \'%s\' dataset data - \'%s\'', action, dataset_url) return msg else: logger.error('FAIL setting \'%s\' dataset data - \'%s\': %s', action, dataset_url, msg) return msg
def tag_search(self, tag_name): """ Get the tag by name. It returns a dictionary like: {u'count': 1, u'results': [{u'vocabulary_id': None, u'id': u'tag_id', u'name': u'tag_name'}]} """ resp = None try: resp = self.__conn.action.tag_search(query=tag_name) except ckanapi.NotFound: logger.error('Search tag: \'%s\' not found.' % tag_name) else: if resp[u'count']==0: logger.error('Search tag: \'%s\' not found.' % tag_name) else: logger.info('Search tag: \'%s\' found.' % tag_name) return resp
) if MODE.lower() == 'phase': # This mode is intended to be used for correcting the phase error in your CT sensors. Please ensure that you have a purely resistive load running through your CT sensors - that means no electric fans and no digital circuitry! PF_ROUNDING_DIGITS = 3 # This variable controls how many decimal places the PF will be rounded while True: try: ct_num = int( input( "\nWhich CT number are you calibrating? Enter the number of the CT label [0 - 5]: " )) if ct_num not in range(0, 6): logger.error( "Please choose from CT numbers 0, 1, 2, 3, 4, or 5." ) else: ct_selection = f'ct{ct_num}' break except ValueError: logger.error( "Please enter an integer! Acceptable choices are: 0, 1, 2, 3, 4, 5." ) cont = input( dedent(f""" #------------------------------------------------------------------------------# # IMPORTANT: Make sure that current transformer {ct_selection} is installed over # # a purely resistive load and that the load is turned on # # before continuing with the calibration! #
def main(): # Check for command line arguments # d: install_dir: {DOT_INSTALL_DIR} # v: verbose # h: help install_dir = None modules = None try: options, _ = getopt.getopt( sys.argv[1:], "d:vhm:", ["installdir=", "help", "verbose", "modules="]) except getopt.GetoptError as err: print(err) print("Please see the help (--help).") exit(1) # Parse the command line arguments. for option, argument in options: if option in ("-v", "--verbose"): logger.setLevel(logging.DEBUG) for handler in logger.handlers: handler.setLevel(logging.DEBUG) elif option in ("-h", "--help"): print_help() exit(0) elif option in ("-d", "--installdir"): install_dir = os.path.abspath(argument) elif option in ("-m", "--modules"): if argument.lower() == "all": modules = list(helpers.scan_for_installers(install_dir).keys()) else: modules = argument.split(",") else: assert False, "Unknown option {}.".format(option) if not install_dir: logger.fatal("Installation directory not provided. Not installing.") exit(1) if not modules or len(modules) < 1: logger.fatal("No modules selected.") exit(1) # Get all available modules for installation. available_modules = helpers.scan_for_installers(install_dir) # Remove the dependency modules from the list so they don't get installed twice. dependency_modules = [] for module in modules: if "depends" in available_modules[module].keys(): for dependency in available_modules[module]["depends"]: dependency_modules.append(dependency) for module in dependency_modules: if module in modules: modules.remove(module) logger.debug("Installation directory: {}".format(install_dir)) for module in modules: try: install_module(module, install_dir, available_modules) except Exception as e: logger.error("Failed to install {}\n {}".format(module, e)) print("\nAll done installing!")
def install_module(module_name, dot_install_dir, available_modules, is_dependency=False, install_dependencies=True): # Cannot install a module if it doesn't have an installer. if module_name not in available_modules.keys() and module_name.split( ":")[0] not in ["package", "packages"]: logger.error("{} is not installable.".format(module_name)) return False if module_name.split(":")[0] not in ["package", "packages"]: module = available_modules[module_name] # Check if the module needs an alternate installer function. name_split = module_name.split(":") if len(name_split) > 1: if name_split[0] not in installer_map.keys(): logger.critical("Installer for {} not found.".format(module_name)) return False installer = installer_map[name_split[0]] return installer(module_name, dot_install_dir, available_modules, is_dependency) dependency_str = " dependency" if is_dependency else "" logger.info("Installing{}: {}".format(dependency_str, module_name)) # Install the module's dependencies first (if any). if install_dependencies: if "depends" in module.keys(): logger.debug("Found dependencies for {}.".format(module_name)) if len(module["depends"]) > 0: for dependency in module["depends"]: if not install_module(dependency, dot_install_dir, available_modules, is_dependency=True): logger.critical( "{} could not install dependency {}.".format( module_name, dependency)) return False # Check if the entire directory can be installed. if "install_dir" in module.keys(): install_dir = module["install_dir"] logger.debug("[{}] Installing entire directory to {}.".format( module_name, install_dir)) source_dir = helpers.get_config(module["config_dir"]) helpers.symlink(source_dir, install_dir, is_directory=True) elif "config_files" in module.keys(): for config_file in module["config_files"]: install_location = module["config_files"][config_file] logger.debug("[{}] Installing {} to {}.".format( module_name, config_file, install_location)) source_file = helpers.get_config(module["config_dir"], config_file) helpers.symlink(source_file, install_location) else: logger.debug("[{}]: No config files to install.".format(module_name)) # Module has been successfully installed. return True
def send_task_2_worker(task_id): """ 定时任务响应函数,负责把任务按账号拆解成job, 并发送给最适合的队列 :param task_id: 任务id :return: 成功返回True, 失败返回False """ try: jobs = [] time_it_beg = datetime.datetime.now() db_scoped_session = ScopedSession() task = db_scoped_session.query( Task.category, Task.configure, Task.limit_counts, Task.succeed_counts, Task.scheduler).filter(Task.id == task_id).first() if not task: logger.error( 'send_task_2_worker can not find the task, id={}. '.format( task_id)) return False category, task_configure, limit_counts, succeed_counts, sch_id = task sch_mode = db_scoped_session.query( Scheduler.mode).filter(Scheduler.id == sch_id).first() # 对于周期性任务,每次产生的job会严格控制, 但对于一次性任务, 用户指定多少个账号,就用多少个账号 if sch_mode[0] in [1, 2]: if limit_counts: # 如果当前任务的成功数大于需求数, 或者成功数加上正在运行的job数目大于用于需求数110%, 则不需要继续产生job if succeed_counts >= int(limit_counts * 1.2): logger.warning( 'send_task_2_worker ignore, task already finished, task id={}, succeed jobs({}) >= limit counts({})*1.2' .format(task_id, succeed_counts, limit_counts)) return True task_running_jobs = db_scoped_session.query(Job).filter( and_(Job.task == task_id, Job.status == 'running')).count() if task_running_jobs + succeed_counts >= int( limit_counts * 1.2): logger.warning( 'send_task_2_worker ignore, task will finish, task id={}, succeed jobs({})+running jobs({}) >= limit counts({})*1.2' .format(task_id, succeed_counts, task_running_jobs, limit_counts)) return True # 一个任务正在运行job积压过多时, 暂时停止产生新的jobs if task_running_jobs >= 10000: logger.warning( 'task({}) jobs num={} has reached jobs limit 10000'. format(task_id, task_running_jobs)) return True # 根据task的类别,找到task对应的处理函数 tcg = db_scoped_session.query(TaskCategory.processor).filter( TaskCategory.category == category).first() if not tcg: return False # 每一个类型的任务都对应一个处理器 task_processor = tcg[0] if not task_processor: logger.error( 'Task(id={}) have no processor, ignore processing.'.format( task_id)) return False logger.info( '---------send_task_2_worker task id={}. --------'.format(task_id)) # 找到任务的所有账号 res = db_scoped_session.query(TaskAccountGroup.account_id).filter( TaskAccountGroup.task_id == task_id).all() account_ids = [x[0] for x in res] accounts = db_scoped_session.query( Account.id, Account.status, Account.account, Account.password, Account.email, Account.email_pwd, Account.gender, Account.phone_number, Account.birthday, Account.national_id, Account.name, Account.active_area, Account.active_browser, Account.profile_path, Account.configure).filter(Account.id.in_(account_ids)).all() # agents = db_scoped_session.query(Agent.id, Agent.active_area).filter(Agent.status != -1).order_by(Agent.status).all() # 一个任务会有多个账号, 按照账号对任务进行第一次拆分 real_accounts_num = 0 for acc in accounts: acc_id, status, account, password, email, email_pwd, gender, phone_number, birthday, national_id, name, \ active_area, active_browser_id, profile_path, account_configure = acc if status == 'invalid': logger.warning( 'account status in invalid. task id={}, account id={}'. format(task_id, acc_id)) continue area = db_scoped_session.query(Area).filter( Area.id == active_area).first() queue_name = 'default' area_id = None if area: area_id, queue_name = area.id, area.name else: logger.warning( 'There have no optimal agent for task, task id={}, account id={}, account area={}' .format(task_id, acc_id, active_area)) active_browser = db_scoped_session.query(FingerPrint.value).filter( FingerPrint.id == active_browser_id).first() if get_system_args()["force_display"] == 0: headless = True if get_environment() == 'pro' else False else: headless = False # 构建任务执行必备参数 inputs = { 'system': { 'headless': headless }, 'task': { 'task_id': task_id, 'configure': json.loads(task_configure) if task_configure else {}, }, 'account': { 'account': account, 'password': password, 'status': status, 'email': email, 'email_pwd': email_pwd, 'gender': gender, 'phone_number': phone_number, 'birthday': birthday, 'national_id': national_id, 'name': name, 'active_area': active_area, 'active_browser': json.loads(active_browser[0]) if active_browser else {}, 'profile_path': profile_path, 'configure': json.loads(account_configure) if account_configure else {} } } celery_task_name = "tasks.tasks.{}".format(task_processor) real_accounts_num += 1 track = app.send_task(celery_task_name, args=(inputs, ), queue=queue_name, routing_key=queue_name) logger.info( '-----send sub task to worker, celery task name={}, area id={}, queue={}, ' 'task id={}, account id={}, track id={}'.format( celery_task_name, area_id, queue_name, task_id, acc_id, track.id)) job = Job() job.task = task_id job.task = task_id job.account = acc_id job.area = area_id job.status = 'running' job.track_id = track.id job.start_time = datetime.datetime.now() jobs.append(job) if sch_mode[0] in [1, 2]: # 如果已经在运行的jobs,加上当前产生的jobs数量超过用户需求数量,则break, 停止生产jobs, 下个调度周期重新检测再试 total_running_jobs = task_running_jobs + real_accounts_num if (limit_counts and total_running_jobs >= int( limit_counts * 1.2)) or total_running_jobs >= 10000: logger.warning( 'task({}) total running jobs num({}) is already more than limit counts({})*1.2' .format(task_id, total_running_jobs, limit_counts)) break # 更新任务状态为running # task实际可用的账号数目, 会根据每次轮循时account状态的不同而变化 db_scoped_session.query(Task).filter(and_(Task.id == task_id, Task.status.in_(['new', 'pending'])))\ .update({Task.status: "running", Task.start_time: datetime.datetime.now(), Task.real_accounts_num: real_accounts_num, Task.last_update: datetime.datetime.now()}, synchronize_session=False) if jobs: db_scoped_session.add_all(jobs) db_scoped_session.commit() logger.info( '----send_task_2_worker send task {}, produce jobs={}, used {} seconds. ' .format(task_id, real_accounts_num, (datetime.datetime.now() - time_it_beg).seconds)) except BaseException as e: logger.exception( 'send_task_2_worker exception task id={}, e={}'.format(task_id, e)) db_scoped_session.rollback() finally: ScopedSession.remove() return True
def monitor(id, type): with app.app_context(): status = '成功执行但未监测到变化' global_content = None try: if type == 'html': task = Task.query.filter_by(id=id).first() url = task.url selector_type = task.selector_type selector = task.selector is_chrome = task.is_chrome regular_expression = task.regular_expression mail = task.mail wechat = task.wechat pushover = task.pushover name = task.name rule = task.rule headers = task.headers last = Content.query.filter_by(task_id=id, task_type=type).first() if not last: last = Content(id) last_content = last.content content = get_content(url, is_chrome, selector_type, selector, regular_expression, headers) global_content = content status_code = is_changed(rule, content, last_content) logger.info( 'rule: {}, content: {}, last_content: {}, status_code: {}'. format(rule, content, last_content, status_code)) if status_code == 1: status = '监测到变化,但未命中规则,最新值为{}'.format(content) last.content = content db.session.add(last) db.session.commit() elif status_code == 2: status = '监测到变化,且命中规则,最新值为{}'.format(content) msg = wraper_msg(content, url) send_message(msg, name, mail, wechat, pushover) last.content = content db.session.add(last) db.session.commit() elif status_code == 3: status = '监测到变化,最新值为{}'.format(content) msg = wraper_msg(content, url) send_message(msg, name, mail, wechat, pushover) last.content = content db.session.add(last) db.session.commit() elif type == 'rss': rss_task = RSSTask.query.filter_by(id=id).first() url = rss_task.url name = rss_task.name mail = rss_task.mail wechat = rss_task.wechat pushover = rss_task.pushover last = Content.query.filter_by(task_id=id, task_type=type).first() if not last: last = Content(id, 'rss') last_guid = last.content item = get_rss_content(url) if item['guid'] != last_guid: global_content = content content = wraper_rss_msg(item) send_message(content, name, mail, wechat, pushover) last.content = item['guid'] db.session.add(last) db.session.commit() status = '监测到变化,最新值:' + item['title'] except FunctionTimedOut: logger.error(traceback.format_exc()) status = '解析RSS超时' except PartNotificationError as e: logger.error(traceback.format_exc()) status = repr(e) last.content = global_content db.session.add(last) db.session.commit() except Exception as e: logger.error(traceback.format_exc()) status = repr(e) task_status = TaskStatus.query.filter_by(task_id=id, task_type=type).first() task_status.last_run = datetime.now() task_status.last_status = status db.session.add(task_status) db.session.commit()