Example #1
0
def filter_data_by_tjid(records, tjid):
    """
    抽取出某个样本的信息

    Parameters
    ------------
    records : list
        list of *Record object
    tjid : str
        barcode or tjid

    Returns
    --------
    filtered_data : list
    """
    if len(records) == 0:
        return []
    elif isinstance(records[0], GeneTestRecord):
        return [record for record in records if record.barcode == tjid]
    elif isinstance(records[0], PhysicalExaminationRecord) or \
            isinstance(records[0], QuestionnaireRecord):
        return [record for record in records if record.tjid == tjid]
    else:
        logger.error('Object in records are not support!')
        exit(1)
Example #2
0
 def wrapped(*args, **kwargs):
     try:
         fn(*args, **kwargs)
     except Exception, e:
         logger.error('Service curd error: %s' % e)
         db.session.rollback()
         raise TypeError
Example #3
0
 def parse_negatives(self):
     if not self.nfilename:
         logger.error('No se ha especificado un archivo '\
                 'de trazas negativas!')
         raise Exception('No se ha especificado un archivo '\
                 'de trazas negativas!')
     if self.verbose:
         print 'Starting parse of negative traces\n'
         start_time = time.time()
     if self.nfilename.endswith('.xes'):
         parser = NegativeParser(self.nfilename,
                 required_dimension=self.dim,
                 event_dictionary=self.event_dictionary)
     elif self.filename.endswith('.txt'):
        raise Exception('Not implemented yet!')
     else:
         logger.error("Error in file %s extension. Only '.xes'"\
                 " is allowed!",(self.nfilename or ''))
         raise Exception("Error in file %s extension. Only '.xes'"\
                 " is allowed!"%(self.nfilename or ''))
     parser.parikhs_vector()
     self.npv_traces = parser.pv_traces
     self.npv_set = parser.pv_set
     self.npv_array = parser.pv_array
     if self.verbose:
         print 'Parse of negative traces done\n'
         elapsed_time = time.time() - start_time
         print '# RESULTADO  obtenido en: ', elapsed_time
         print '#'*40+'\n'
Example #4
0
 def _fetch_data(self, outfile, attributes, filters='', header=None, debug=False):
     cnt_all = 0
     out_f, outfile = safewfile(outfile, prompt=(not self.no_confirm), default='O')
     if header:
         out_f.write('\t'.join(header) + '\n')
     logging.info('Dumping "%s"...' % os.path.split(outfile)[1])
     for species in self.species_li:
         dataset = self.get_dataset_name(species)
         taxid = species[2]
         if not dataset:
             continue
         xml = self._make_query_xml(dataset, attributes=attributes, filters=filters)
         if debug:
             logging.info(xml)
         try:
             con = self.query_mart(xml)
         except MartException:
             import traceback
             err_msg = traceback.format_exc()
             logging.error("%s %s" % (species[0], err_msg))
             continue
         cnt = 0
         for line in con.split('\n'):
             if line.strip() != '':
                 out_f.write(str(taxid) + '\t' + line + '\n')
                 cnt += 1
                 cnt_all += 1
         logging.info("%s %s" % (species[0], cnt))
     out_f.close()
     logging.info("Total: %d" % cnt_all)
Example #5
0
def get_phi_from_database(tjid):
    """
    从数据库获取指定样本的个人信息

    Parameters
    ----------
    tjid : str
        specimen barcode

    Returns
    --------
    phi : PHI
        PHI object
    """
    conn = connect_database()
    with conn.cursor() as cur:
        sql = 'select barcode, name, gender, dob, phone_no, test_product ' \
              'from xy_specimen where barcode=\'%s\';' % tjid
        cur.execute(sql)
        res = cur.fetchone()
    conn.close()
    if res is None:
        logger.error('找不到样本%s的个人信息!' % tjid)
        exit(1)
    phi = PHI()
    phi.barcode = res[0]
    phi.name = ensure_unicode(res[1])
    phi.gender = ensure_unicode(res[2])
    phi.dob = res[3]
    phi.phone_no = res[4]
    phi.test_product = res[5]
    return phi
Example #6
0
def generate_clinvar_lib(data_folder):
    sys.path.insert(0,data_folder)
    orig_path = os.getcwd()
    try:
        os.chdir(data_folder)
        logging.info("Generate XM parser")
        ret = os.system('''generateDS.py -f -o "clinvar_tmp.py" -s "clinvarsubs.py" clinvar_public.xsd''')
        if ret != 0:
            logging.error("Unable to generate parser, return code: %s" % ret)
            raise
        try:
            py = open("clinvar_tmp.py").read()
            # convert py2 to py3 (though they claim it support both versions)
            py = py.replace("from StringIO import StringIO","from io import StringIO")
            fout = open("clinvar.py","w")
            fout.write(py)
            fout.close()
            os.unlink("clinvar_tmp.py")
            # can we import it ?
            import clinvar
            logging.info("Found generated clinvar module: %s" % clinvar)
        except Exception as e:
            logging.error("Cannot convert to py3: %s" % e)
            raise
    finally:
        os.chdir(orig_path)
Example #7
0
def post_vote():
  user = current_user
  if not request.json or not 'talkId' in request.json:
    abort(400)

  talkId = request.json['talkId']

  try:
    vote = db.session.query(Vote).filter(Vote.talkId==talkId).filter(Vote.email==user.email).first()
  except:
    logger.error("Unexpected error loading the vote:", sys.exc_info()[0])
    raise

  try:
    if vote == None:
      vote = Vote()
      vote.talkId = talkId
      vote.email = user.email
    vote.fitsTechfest = request.json['fitsTechfest']
    vote.fitsTrack = request.json['fitsTrack']
    vote.expectedAttendance = request.json['expectedAttendance']
    db.session.add(vote)
    db.session.commit()
    logger.debug('User {} voted on talkId {} - {}/{}/{}.'.format(user.email,
      talkId, vote.fitsTechfest, vote.fitsTrack, vote.expectedAttendance))
  except:
    logger.error("Unexpected error saving the vote:", sys.exc_info()[0])
    raise

  return json.dumps(vote.serialize), 201
Example #8
0
 def compute_hiperspaces(self):
     # La característica heurística al buscar conexiones entre
     # diferentes clusters hace que pueda fallar
     # por lo que redirigimos la salida para ser silenciosos
     # en esos casos
     if not len(self.points) > 0:
         logger.error('No points to compute hull!')
         raise Exception('No points to compute hull!')
     stderr_fd = sys.stderr.fileno()
     with open('/tmp/qhull-output.log', 'w') as f, stderr_redirected(f):
         points = list(self.points)
         logger.info('Searching for hull in dimension %s based on %s points',
                 len(points[0]),len(points))
         output = qconvex('n',points)
         if len(output) == 1:
             logger.debug('Could not get Hull. Joggle input?')
     try:
         dim, facets_nbr, facets = self.__parse_hs_output(output)
     except IncorrectOutput:
         logger.warning('Could not get hull')
         raise CannotGetHull()
     logger.info('Found hull in dimension %s of %s facets',
             dim,len(facets))
     self.dim = dim
     self.facets = facets
     if self.verbose:
         print "Computed MCH with ",facets_nbr," halfspaces"
         print 'This are them:\n'
         for facet in self.facets:print facet
     return self.dim
def get_participants_info(input_file, webinar_id, details_mark):
    """
    Gather information about the webinar participants
    
    :input: a csv file of attendees for a GotoWebinar to read from
            the webinar id number
    :return: a list of two lists containing the webinar participants 
            details headers and a list of items representing corresponding 
            header values
    """
    
    reading_details = 0
    values_list = []
    remove_row_marker = '*If an attendee left and rejoined the session, the In Session Duration column only includes their first visit.'
    try: 
        with open(input_file, 'rb') as csv_file:
            rdr = reader(csv_file)
            for row in rdr:
                if not reading_details:
                    if details_mark in row:
                        headers = ['Webinar ID'] + rdr.next()
                        reading_details = 1
                        continue
                elif remove_row_marker not in row:
                    values_list.append([webinar_id] + row)
        return [headers, values_list]
    except IOError as e:
        logger.error("Cannot read file '{}'".format(input_file))
        logger.debug("Exception:\n{}".format(e))
Example #10
0
 def parse_traces(self):
     if self.verbose:
         print 'Starting parse\n'
         start_time = time.time()
     if self.filename.endswith('.xes'):
         parser = XesParser(self.filename)
     elif self.filename.endswith('.txt'):
         parser = AdHocParser(self.filename)
     else:
         logger.error("Error in file %s extension. Only '.xes' and '.txt'"\
                 " are allowed!",(self.filename or ''))
         raise Exception("Error in file %s extension. Only '.xes' and '.txt'"\
                 " are allowed!"%(self.filename or ''))
     parser.parikhs_vector()
     self.event_dictionary = parser.event_dictionary
     self.reversed_dictionary = rotate_dict(parser.event_dictionary)
     self.pv_traces = parser.pv_traces
     self.pv_set = parser.pv_set
     self.pv_array = parser.pv_array
     self.dim = parser.dim
     if self.verbose:
         print 'Parse done\n'
         elapsed_time = time.time() - start_time
         print '# RESULTADO  obtenido en: ', elapsed_time
         print '#'*40+'\n'
Example #11
0
 def do_sampling(self, points, *args, **kwargs):
     facets = []
     for _ in xrange(self.samp_num):
         # When sampling, it can be the case that the calculated
         # sample is insuficient to calculate MCH, so try a few
         # times before actually raising error
         tries = 3 if self.samp_size else 1
         while tries:
             try:
                 points = self.get_sample()
                 qhull = func(self, points, *args, **kwargs)
                 # Agregamos las facetas que ya calculamos
                 qhull.union(facets)
                 # Los puntos no considerados restringen las facetas
                 for outsider in self.pv_set - points:
                     qhull.restrict_to(outsider)
                 facets = qhull.facets
                 tries = 0
             except CannotIntegerify, err:
                 raise err
             except (CannotGetHull,WrongDimension), err:
                 tries -= 1
                 if tries == 0 and self.samp_size:
                     logger.error('Cannot get MCH. Maybe doing *TOO*'\
                             ' small sampling?')
                 raise err
Example #12
0
    def compute_hiperspaces(self):
        if not len(self.points) > 0:
            logger.error('No points to compute hull!')
            raise Exception('No points to compute hull!')

        # The heuristic caracteristic when searching to connect
        # different clusters does that it might fail
        # so we redirect the stdout to avoid such error
        # being visible to user
        stderr_fd = sys.stderr.fileno()
        with open('/tmp/qhull-output.log', 'w') as f, stderr_redirected(f):
            points = list(self.points)
            logger.info('Searching for hull in dimension %s based on %s points',
                    len(points[0]),len(points))
            output = qconvex('n',points)
            if len(output) == 1:
                logger.debug('Could not get Hull. Joggle input?')
        try:
            dim, facets_nbr, facets = self.__parse_hs_output(output)
        except IncorrectOutput:
            logger.error('Could not get hull')
            raise CannotGetHull()
        logger.info('Found hull in dimension %s of %s facets',
                dim,facets_nbr)
        self.dim = dim
        self.facets = facets
        return self.dim
def get_webinar_info(input_file, details_mark):
    """
    Gather information about the webinar
    
    :input: a csv file of attendees for a GotoWebinar to read from
    :return: a list of two lists containing the webinar details 
             headers and corresponding header values
    """
    try:
        with open(input_file, 'rb') as csv_file:
            rdr = reader(csv_file)
            # read Generated info and advance to next useful headers
            rdr.next()
            keys = rdr.next()
            vals = rdr.next()
            rdr.next()
            # read the rest of webinar info
            while details_mark not in keys:
                try:
                    headers += clear_empty_from_list(keys)
                    values += clear_empty_from_list(vals)
                except NameError:
                    headers = clear_empty_from_list(keys)
                    values = clear_empty_from_list(vals)
                keys = rdr.next()
                vals = rdr.next()
        return [headers, values]
    except IOError as e:
       logger.error("Cannot read file '{}'".format(input_file))
       logger.debug("Exception:\n{}".format(e))
Example #14
0
    def package_delete(self, data_package):
        """ Delete a package by package_title
            return True if the operation success
                   False otherwise
        """
        msg = ''

        package_title = data_package[u'title']
        package_identifier = data_package[u'identifier']
        resp = self.package_search(
            prop='identifier', value=package_identifier
        )

        if not resp:
            msg = 'Package delete: \'%s\' NOT FOUND.' % package_title
            logger.error(msg)
            return False, msg

        package_id = resp[0][u'id']

        try:
            resp = self.__conn.action.package_delete(id=package_id)
        except ckanapi.NotFound:
            msg = 'Package delete: \'%s\' NOT FOUND.' % package_title
            logger.info(msg)
            return False, msg
        except Exception, error:
            msg = 'Package delete: ERROR to execute the command for %s.: %s' % (
                package_title, error
            )
            logger.error(msg)
            return False, msg
Example #15
0
 def bulk_update(self):
     """ Queries SDS for all datasets and injects messages in rabbitmq.
     """
     logger.info('START bulk update')
     result_json, msg = self.query_all_datasets()
     if msg:
         logger.error('BULK update: %s', msg)
     else:
         datasets_json = result_json['results']['bindings']
         logger.info('BULK update: %s datasets found', len(datasets_json))
         rabbit = RabbitMQConnector(**rabbit_config)
         rabbit.open_connection()
         rabbit.declare_queue(self.queue_name)
         counter = 1
         for item_json in datasets_json:
             dataset_identifier = item_json['id']['value']
             dataset_url = item_json['dataset']['value']
             action = 'update'
             body = '%(action)s|%(dataset_url)s|%(dataset_identifier)s' % {
                 'action': action,
                 'dataset_url': dataset_url,
                 'dataset_identifier': dataset_identifier}
             logger.info('BULK update %s: sending \'%s\' in \'%s\'', counter, body, self.queue_name)
             rabbit.send_message(self.queue_name, body)
             counter += 1
         rabbit.close_connection()
     logger.info('DONE bulk update')
Example #16
0
    def get_qhull(self, neg_points=[]):
        """ From a Petrinet, gets it's representationas a Convex Hull
        """
        # Create an empty Convex Hull
        qhull = Qhull(neg_points=neg_points)
        # La normal por defaul para cada facet
        dim = len(self.transitions)
        tmpl_normal = [0]*dim
        # Each transition corresponds to one dimension
        # transition.label -> dimension number
        transitions = self.event_dictionary
        # Each facet corresponds to one place
        # place.id -> {normal->[arc.value], offset->marking}
        facets_dict = {}
        # Iteramos sobre los arcos
        for arc in self.arcs:
            # No debería haber arcos nulos
            if not arc.value:
                logger.error('We found a zero arc: %s',arc)
                raise Exception('We found a zero arc: %s',arc)
            # NOTE recordar que nuestra representación interna de HS es
            # al revés que el paper (usamos <= 0 en lguar de >= 0)
            if isinstance(arc.source,Transition):
                # Si el arco sale de una transition el coeficiente es < 0
                coef = -1*arc.value
                transition = arc.source
                place = arc.destination
            else:
                # Si el arco sale de un place el coeficiente es > 0
                coef = arc.value
                place = arc.source
                transition = arc.destination
            x = transitions.setdefault(transition.label,len(transitions))
            facet = facets_dict.setdefault(place.id,{'normal':list(tmpl_normal),
                                                    'in_transitions':[],
                                                    'out_transitions':[],
                                                    'offset': -1*place.marking,
                                                    'id':place.id})
            if coef < 0:
                facet['in_transitions'].append(transition.label)
            else:
                facet['out_transitions'].append(transition.label)
            if facet['normal'][x]:
                logger.debug('Coeficient already loaded. Dummy place')
                coef = 0
            facet['normal'][x] = coef

        facets = []
        for pl_id, facet in facets_dict.items():
            # Do not create the facet for dummy places
            if not any(facet['normal']):
                continue
            # Values are always integer
            hs = Halfspace(facet['normal'], facet['offset'], integer_vals=False)
            logger.debug('Adding facet %s',hs)
            facets.append(hs)
        qhull.dim = dim
        qhull.facets = facets
        return qhull
Example #17
0
 def package_search(self, prop, value):
     """ Search for a package
     """
     resp = None
     try:
         resp = self.__conn.action.package_search(fq='%s:%s' % (prop, value))
     except Exception, error:
         logger.error('Error searching for package \'%s:%s\'.' % (prop, value))
Example #18
0
def try_read(fn):
    def wrapped(*args, **kwargs):
        try:
            fn(*args, **kwargs)
        except NoResultFound, e:
            logger.info(' No Query Result Found: %s' % str(kwargs))
        except MultipleResultsFound, e:
            logger.error('Multiple Query Results Found: %s' % str(kwargs))
def main(*args, **kwargs):
    usage = """
        Usage: ./xes_comparator_script.py <.ini config filename>

        Config file options:
     %s\n
NOTE: Do you have the needed environment variables?
    - XES : Path to .xes file with traces (for running PacH)
    - PNML : Path to .pnml file with Petri net model (for simplifying PROM models)
    - NXES : Path to .xes file with negative traces
    - PETRI : Path where simplified .pnml files should be moved to after script ends
    - STATS : Path where statistic files should be moved to after script ends
  IMPORTANT: NO PATH MUST END IN '/' (it is added automatically)
    """%(config_options)
    if not check_argv(sys.argv, minimum=1, maximum=4):
        print usage
        ret = -1
    else:
        ret = 0
        try:
            config_file = sys.argv[1]
            if not config_file.endswith('.ini'):
                print config_file, ' does not end in .ini. It should...'
                raise Exception('Filename has wrong extension')
            if not isfile(config_file):
                raise Exception("No such file")
            if '--debug' in sys.argv:
                pdb.set_trace()
            for filename, arguments in parse_config(config_file):
                comparator = ComparatorXes(filename, **arguments)
                #comparator.comparator.check_hull(log_file=filename,event_dictionary=comparator.pach.event_dictionary)
                complexity = comparator.compare(log_file=filename,event_dictionary=comparator.pach.event_dictionary)
                comparator.generate_pnml()
                comparator.generate_outputs()
                if '--verbose' in sys.argv:
                    print complexity
            pnml_folder,out_folder = parse_config_output(config_file)
            pwd = os.getcwd()
            for basename in os.listdir(pwd):
                if basename.endswith('.pnml'):
                    pnml_file = os.path.join(pwd, basename)
                    if os.path.isfile(pnml_file):
                        shutil.copy2(pnml_file, pnml_folder)
                        os.remove(pnml_file)
                elif basename.endswith('.out'):
                    out_file = os.path.join(pwd, basename)
                    if os.path.isfile(out_file):
                        shutil.copy2(out_file, out_folder)
                        os.remove(out_file)
        except Exception, err:
            ret = 1
            if hasattr(err, 'message'):
                print 'Error: ', err.message
            else:
                print 'Error: ', err
            logger.error('Error: %s' % err, exc_info=True)
            raise
        return ret
def process_csv_info():
    """
    Processes the read information:
        Separate headers form webinars and participants details.
        Detect differences in participants headers and cope with them 
            ( keep the longes header and add empty fields in the right 
              positionfor participants info rows that are shorter than 
              the longest header )        
        Basic error checking and debug messahe logging.
    
    :return: 1 on error and 0 on success
    """
    global w_dict, w_header, w_values, w_info
    global p_header, p_values, p_headers_list

    # get headers and values for webinars
    w_header = w_info[0]
    w_values = []
    for key in w_dict:
        w_values += w_dict[key]

    # get headers and values for participants
    p_header, p_values, diffs = p_headers_list[0], [], []
    for h in p_headers_list[1:]:
        # try to find differences in participants headers 
        if len(p_header) < len(h):
            diffs = [x for x in h if x not in p_header]
            p_header = h
            break
        elif len(h) < len(p_header):
            diffs = [x for x in p_header if x not in h]
            break
    if diffs:
        diffs_pos = [p_header.index(x) for x in diffs]
    for key in p_dict:
        for row in p_dict[key]: 
            if len(row) < len(p_header):
                # handle differences in input files headers
                if not diffs:
                    logger.error("Header longer than row but no diffs detected.")
                    return 1
                for pos in diffs_pos:
                    insert_pos = int(pos)
                    row.insert(insert_pos, "")
            elif len(row) > len(p_header):
                logger.error("Participants row longer than header.Exiting...")
                logger.debug('''
webinar id:{}
final_participants_header:{}
row:{}
'''.format(key, p_header, row))
                return 1
            else:
                break
        p_values += p_dict[key]

    return 0
Example #21
0
 def run(self, tags, rules, data):
   """Run rules (run all when "rules" is empty, othervise run only these
      listed there) and return dict with their answers"""
   results = []
   for mod in self.rules:
     # Skip rule if we are supposed to run only specific rules and this
     # one is not the choosen one
     if len(rules) > 0 and mod.__name__ not in rules:
       logger.debug("Skipping %s because only specific rules are supposed to run" % mod.__name__)
       continue
     # Skip this rule if there is no intersection of tags we should run
     # and tags this rule should be run for
     if len([val for val in tags if val in mod.tags]) == 0:
       logger.debug("Skipping %s because it is not tagged with provided tags" % mod.__name__)
       continue
     # Finally run the rule
     func = getattr(mod, 'main')
     func_text = getattr(mod, 'text')
     name = getattr(mod, 'name')
     result = None
     used = []
     text = ''
     # Reset list of data rule used
     data.reset_access_list()
     # Now run the rule
     try:
       result = func(data)
     except DataNotAvailable:
       logger.error("Data not available for %s" % mod.__name__)
       result = False
     except:
       logger.exception("Something failed badly when executing %s" % mod.__name__)
       result = False
     logger.info("Rule %s returned %s" % (mod.__name__, result))
     # Store list of data rule has used
     used = data.get_access_list()
     # Now if necessary, get description of whats wrong
     if result:
       try:
         text = func_text(result)
       except:
         logger.exception("Something failed badly when getting description for %s" % mod.__name__)
     # Determine what the result was
     if result:
       status = 'FAIL'
     elif result is False:
       status = 'SKIP'
     elif result is None:
       status = 'PASS'
     else:
       logger.error("Failed to understand to result of %s" % result)
       continue
     # Save what was returned
     results.append({'label': mod.__name__, 'status': status, 'result': result, 'name': name, 'text': text, 'used': used})
   return results
Example #22
0
 def all_in(self, points):
     # Sanity check. Are points inside the Hull?
     # It makes thing slower, speacially in big cases
     logger.info('Sanity check: Are all points still valid?')
     where = self.separate(points)
     if len(where.get('outside',[])) > 0:
         logger.error('Some points are outisde the hull')
         raise LostPoints('Some points are outisde the hull: %s',
                 where['outside'])
     logger.info('Sanity check passed')
     return True
Example #23
0
 def all_in(self, points):
     # Esto es para chequear que no dejando a nadia afuera
     # hace todo más lento en ejemplos grandes
     logger.info('Sanity check: Are all points still valid?')
     where = self.separate(points)
     if len(where.get('outside',[])) > 0:
         logger.error('Some points are outisde the hull')
         raise LostPoints('Some points are outisde the hull: %s',
                 where['outside'])
     logger.info('Sanity check passed')
     return True
Example #24
0
def startPythm():
    """Start the Pythm and renice if it was requested
    """
    config = PythmConfig()
    renice_level = config.get("pythm", "renice", default=-5, dtype=int)
    if renice_level != 0:
        logger.debug("Renicing pythm to %d" % renice_level)
        try:
            os.nice(renice_level)
        except OSError, e:
            logger.error("Failed to renice: %s" % e)
Example #25
0
 def resource_show(self, resource_name):
     """ Get the resource by name
     """
     resp = None
     try:
         resp = self.__conn.action.resource_show(id=resource_name)
     except ckanapi.NotFound:
         logger.error('Resource \'%s\' not found.' % resource_name)
     else:
         logger.info('Resource \'%s\' found.' % package_name)
     return resp
Example #26
0
 def package_show(self, package_name):
     """ Get the package by name
     """
     resp = None
     try:
         resp = self.__conn.action.package_show(id=package_name)
     except ckanapi.NotFound:
         logger.error('Get package: \'%s\' not found.' % package_name)
     else:
         logger.info('Get package: \'%s\' found.' % package_name)
     return resp
Example #27
0
    def tile_for(self, thing):

        if type(thing) is Being:
            tiles = [t for t in self.values() if t.being is thing]
        #else its equipment
        else:
            tiles = [i for i in [t.inventory for t in self.values()] if thing is i]

        if len(tiles) != 1:
            logger.error('tiles %s length != 1', tiles)
            raise KeyError(tiles)
        return tiles[0]
Example #28
0
def lockfile(name, shared=False, retry=True):
    """
    Use the file fn as a lock file, return when the lock has been acquired.
    Returns a variable to pass to unlockfile().
    """
    config.logger.debug("take lockfile %s", name)
    dirname = os.path.dirname(name)
    mkdirhier(dirname)

    if not os.access(dirname, os.W_OK):
        logger.error("Unable to acquire lock '%s', directory is not writable",
                     name)
        sys.exit(1)

    operation = fcntl.LOCK_EX
    if shared:
        operation = fcntl.LOCK_SH
    if not retry:
        operation = operation | fcntl.LOCK_NB

    while True:
        # If we leave the lockfiles lying around there is no problem
        # but we should clean up after ourselves. This gives potential
        # for races though. To work around this, when we acquire the lock
        # we check the file we locked was still the lock file on disk.
        # by comparing inode numbers. If they don't match or the lockfile
        # no longer exists, we start again.

        # This implementation is unfair since the last person to request the
        # lock is the most likely to win it.

        # pylint: disable=broad-except
        # we disable the broad-except because we want to actually catch all possible exceptions
        try:
            lock_file = open(name, 'a+')
            fileno = lock_file.fileno()
            fcntl.flock(fileno, operation)
            statinfo = os.fstat(fileno)
            if os.path.exists(lock_file.name):
                statinfo2 = os.stat(lock_file.name)
                if statinfo.st_ino == statinfo2.st_ino:
                    return lock_file
            lock_file.close()
        except Exception as exc:
            try:
                lock_file.close()
            except Exception as exc2:
                config.logger.error("Failed to close the lockfile: %s", exc2)
            config.logger.error("Failed to acquire the lockfile: %s", exc)
        if not retry:
            return None
Example #29
0
    def set_dataset_data(self, action, dataset_url, dataset_data_rdf, dataset_json):
        """ Use data from SDS in JSON format and update the ODP [#68136]
        """
        logger.info('START setting \'%s\' dataset data - \'%s\'', action, dataset_url)

        resp, msg = self.odp.call_action(action, dataset_json, dataset_data_rdf)

        if not msg:
            logger.info('DONE setting \'%s\' dataset data - \'%s\'', action, dataset_url)
            return msg
        else:
            logger.error('FAIL setting \'%s\' dataset data - \'%s\': %s',
                         action, dataset_url, msg)
            return msg
Example #30
0
 def tag_search(self, tag_name):
     """ Get the tag by name. It returns a dictionary like:
         {u'count': 1, u'results': [{u'vocabulary_id': None, u'id': u'tag_id', u'name': u'tag_name'}]}
     """
     resp = None
     try:
         resp = self.__conn.action.tag_search(query=tag_name)
     except ckanapi.NotFound:
         logger.error('Search tag: \'%s\' not found.' % tag_name)
     else:
         if resp[u'count']==0:
             logger.error('Search tag: \'%s\' not found.' % tag_name)
         else:
             logger.info('Search tag: \'%s\' found.' % tag_name)
     return resp
Example #31
0
                )

        if MODE.lower() == 'phase':
            # This mode is intended to be used for correcting the phase error in your CT sensors. Please ensure that you have a purely resistive load running through your CT sensors - that means no electric fans and no digital circuitry!

            PF_ROUNDING_DIGITS = 3  # This variable controls how many decimal places the PF will be rounded

            while True:
                try:
                    ct_num = int(
                        input(
                            "\nWhich CT number are you calibrating? Enter the number of the CT label [0 - 5]: "
                        ))
                    if ct_num not in range(0, 6):
                        logger.error(
                            "Please choose from CT numbers 0, 1, 2, 3, 4, or 5."
                        )
                    else:
                        ct_selection = f'ct{ct_num}'
                        break
                except ValueError:
                    logger.error(
                        "Please enter an integer! Acceptable choices are: 0, 1, 2, 3, 4, 5."
                    )

            cont = input(
                dedent(f"""
                #------------------------------------------------------------------------------#
                # IMPORTANT: Make sure that current transformer {ct_selection} is installed over          #
                #            a purely resistive load and that the load is turned on            #
                #            before continuing with the calibration!                           #
Example #32
0
def main():
    # Check for command line arguments
    # d: install_dir: {DOT_INSTALL_DIR}
    # v: verbose
    # h: help
    install_dir = None
    modules = None

    try:
        options, _ = getopt.getopt(
            sys.argv[1:], "d:vhm:",
            ["installdir=", "help", "verbose", "modules="])
    except getopt.GetoptError as err:
        print(err)
        print("Please see the help (--help).")
        exit(1)

    # Parse the command line arguments.
    for option, argument in options:
        if option in ("-v", "--verbose"):
            logger.setLevel(logging.DEBUG)
            for handler in logger.handlers:
                handler.setLevel(logging.DEBUG)
        elif option in ("-h", "--help"):
            print_help()
            exit(0)
        elif option in ("-d", "--installdir"):
            install_dir = os.path.abspath(argument)
        elif option in ("-m", "--modules"):
            if argument.lower() == "all":
                modules = list(helpers.scan_for_installers(install_dir).keys())
            else:
                modules = argument.split(",")
        else:
            assert False, "Unknown option {}.".format(option)

    if not install_dir:
        logger.fatal("Installation directory not provided. Not installing.")
        exit(1)

    if not modules or len(modules) < 1:
        logger.fatal("No modules selected.")
        exit(1)

    # Get all available modules for installation.
    available_modules = helpers.scan_for_installers(install_dir)

    # Remove the dependency modules from the list so they don't get installed twice.
    dependency_modules = []
    for module in modules:
        if "depends" in available_modules[module].keys():
            for dependency in available_modules[module]["depends"]:
                dependency_modules.append(dependency)

    for module in dependency_modules:
        if module in modules:
            modules.remove(module)

    logger.debug("Installation directory: {}".format(install_dir))

    for module in modules:
        try:
            install_module(module, install_dir, available_modules)
        except Exception as e:
            logger.error("Failed to install {}\n    {}".format(module, e))

    print("\nAll done installing!")
Example #33
0
def install_module(module_name,
                   dot_install_dir,
                   available_modules,
                   is_dependency=False,
                   install_dependencies=True):
    # Cannot install a module if it doesn't have an installer.
    if module_name not in available_modules.keys() and module_name.split(
            ":")[0] not in ["package", "packages"]:
        logger.error("{} is not installable.".format(module_name))
        return False

    if module_name.split(":")[0] not in ["package", "packages"]:
        module = available_modules[module_name]

    # Check if the module needs an alternate installer function.
    name_split = module_name.split(":")
    if len(name_split) > 1:
        if name_split[0] not in installer_map.keys():
            logger.critical("Installer for {} not found.".format(module_name))
            return False

        installer = installer_map[name_split[0]]
        return installer(module_name, dot_install_dir, available_modules,
                         is_dependency)

    dependency_str = " dependency" if is_dependency else ""
    logger.info("Installing{}: {}".format(dependency_str, module_name))

    # Install the module's dependencies first (if any).
    if install_dependencies:
        if "depends" in module.keys():
            logger.debug("Found dependencies for {}.".format(module_name))
            if len(module["depends"]) > 0:
                for dependency in module["depends"]:
                    if not install_module(dependency,
                                          dot_install_dir,
                                          available_modules,
                                          is_dependency=True):
                        logger.critical(
                            "{} could not install dependency {}.".format(
                                module_name, dependency))
                        return False

    # Check if the entire directory can be installed.
    if "install_dir" in module.keys():
        install_dir = module["install_dir"]
        logger.debug("[{}] Installing entire directory to {}.".format(
            module_name, install_dir))

        source_dir = helpers.get_config(module["config_dir"])
        helpers.symlink(source_dir, install_dir, is_directory=True)
    elif "config_files" in module.keys():
        for config_file in module["config_files"]:
            install_location = module["config_files"][config_file]
            logger.debug("[{}] Installing {} to {}.".format(
                module_name, config_file, install_location))

            source_file = helpers.get_config(module["config_dir"], config_file)
            helpers.symlink(source_file, install_location)
    else:
        logger.debug("[{}]: No config files to install.".format(module_name))

    # Module has been successfully installed.
    return True
Example #34
0
def send_task_2_worker(task_id):
    """
    定时任务响应函数,负责把任务按账号拆解成job, 并发送给最适合的队列
    :param task_id: 任务id
    :return: 成功返回True, 失败返回False
    """
    try:
        jobs = []
        time_it_beg = datetime.datetime.now()
        db_scoped_session = ScopedSession()
        task = db_scoped_session.query(
            Task.category, Task.configure, Task.limit_counts,
            Task.succeed_counts,
            Task.scheduler).filter(Task.id == task_id).first()
        if not task:
            logger.error(
                'send_task_2_worker can not find the task, id={}. '.format(
                    task_id))
            return False

        category, task_configure, limit_counts, succeed_counts, sch_id = task

        sch_mode = db_scoped_session.query(
            Scheduler.mode).filter(Scheduler.id == sch_id).first()

        # 对于周期性任务,每次产生的job会严格控制, 但对于一次性任务, 用户指定多少个账号,就用多少个账号
        if sch_mode[0] in [1, 2]:
            if limit_counts:
                # 如果当前任务的成功数大于需求数, 或者成功数加上正在运行的job数目大于用于需求数110%, 则不需要继续产生job
                if succeed_counts >= int(limit_counts * 1.2):
                    logger.warning(
                        'send_task_2_worker ignore, task already finished, task id={}, succeed jobs({}) >= limit counts({})*1.2'
                        .format(task_id, succeed_counts, limit_counts))
                    return True

                task_running_jobs = db_scoped_session.query(Job).filter(
                    and_(Job.task == task_id,
                         Job.status == 'running')).count()
                if task_running_jobs + succeed_counts >= int(
                        limit_counts * 1.2):
                    logger.warning(
                        'send_task_2_worker ignore, task will finish, task id={}, succeed jobs({})+running jobs({})  >= limit counts({})*1.2'
                        .format(task_id, succeed_counts, task_running_jobs,
                                limit_counts))
                    return True

                # 一个任务正在运行job积压过多时, 暂时停止产生新的jobs
                if task_running_jobs >= 10000:
                    logger.warning(
                        'task({}) jobs num={} has reached jobs limit 10000'.
                        format(task_id, task_running_jobs))
                    return True

        # 根据task的类别,找到task对应的处理函数
        tcg = db_scoped_session.query(TaskCategory.processor).filter(
            TaskCategory.category == category).first()
        if not tcg:
            return False

        # 每一个类型的任务都对应一个处理器
        task_processor = tcg[0]
        if not task_processor:
            logger.error(
                'Task(id={}) have no processor, ignore processing.'.format(
                    task_id))
            return False

        logger.info(
            '---------send_task_2_worker task id={}. --------'.format(task_id))

        # 找到任务的所有账号
        res = db_scoped_session.query(TaskAccountGroup.account_id).filter(
            TaskAccountGroup.task_id == task_id).all()
        account_ids = [x[0] for x in res]
        accounts = db_scoped_session.query(
            Account.id, Account.status, Account.account, Account.password,
            Account.email, Account.email_pwd, Account.gender,
            Account.phone_number, Account.birthday, Account.national_id,
            Account.name, Account.active_area, Account.active_browser,
            Account.profile_path,
            Account.configure).filter(Account.id.in_(account_ids)).all()

        # agents = db_scoped_session.query(Agent.id, Agent.active_area).filter(Agent.status != -1).order_by(Agent.status).all()

        # 一个任务会有多个账号, 按照账号对任务进行第一次拆分
        real_accounts_num = 0
        for acc in accounts:
            acc_id, status, account, password, email, email_pwd, gender, phone_number, birthday, national_id, name, \
            active_area, active_browser_id, profile_path, account_configure = acc

            if status == 'invalid':
                logger.warning(
                    'account status in invalid. task id={}, account id={}'.
                    format(task_id, acc_id))
                continue

            area = db_scoped_session.query(Area).filter(
                Area.id == active_area).first()
            queue_name = 'default'
            area_id = None
            if area:
                area_id, queue_name = area.id, area.name
            else:
                logger.warning(
                    'There have no optimal agent for task, task id={}, account id={}, account area={}'
                    .format(task_id, acc_id, active_area))

            active_browser = db_scoped_session.query(FingerPrint.value).filter(
                FingerPrint.id == active_browser_id).first()

            if get_system_args()["force_display"] == 0:
                headless = True if get_environment() == 'pro' else False
            else:
                headless = False
            # 构建任务执行必备参数
            inputs = {
                'system': {
                    'headless': headless
                },
                'task': {
                    'task_id': task_id,
                    'configure':
                    json.loads(task_configure) if task_configure else {},
                },
                'account': {
                    'account': account,
                    'password': password,
                    'status': status,
                    'email': email,
                    'email_pwd': email_pwd,
                    'gender': gender,
                    'phone_number': phone_number,
                    'birthday': birthday,
                    'national_id': national_id,
                    'name': name,
                    'active_area': active_area,
                    'active_browser':
                    json.loads(active_browser[0]) if active_browser else {},
                    'profile_path': profile_path,
                    'configure':
                    json.loads(account_configure) if account_configure else {}
                }
            }

            celery_task_name = "tasks.tasks.{}".format(task_processor)
            real_accounts_num += 1

            track = app.send_task(celery_task_name,
                                  args=(inputs, ),
                                  queue=queue_name,
                                  routing_key=queue_name)

            logger.info(
                '-----send sub task to worker, celery task name={}, area id={}, queue={}, '
                'task id={}, account id={}, track id={}'.format(
                    celery_task_name, area_id, queue_name, task_id, acc_id,
                    track.id))

            job = Job()
            job.task = task_id
            job.task = task_id
            job.account = acc_id
            job.area = area_id
            job.status = 'running'
            job.track_id = track.id
            job.start_time = datetime.datetime.now()
            jobs.append(job)

            if sch_mode[0] in [1, 2]:
                # 如果已经在运行的jobs,加上当前产生的jobs数量超过用户需求数量,则break, 停止生产jobs, 下个调度周期重新检测再试
                total_running_jobs = task_running_jobs + real_accounts_num
                if (limit_counts and total_running_jobs >= int(
                        limit_counts * 1.2)) or total_running_jobs >= 10000:
                    logger.warning(
                        'task({}) total running jobs num({}) is already more than limit counts({})*1.2'
                        .format(task_id, total_running_jobs, limit_counts))
                    break

        # 更新任务状态为running
        # task实际可用的账号数目, 会根据每次轮循时account状态的不同而变化
        db_scoped_session.query(Task).filter(and_(Task.id == task_id, Task.status.in_(['new', 'pending'])))\
            .update({Task.status: "running", Task.start_time: datetime.datetime.now(),
                     Task.real_accounts_num: real_accounts_num, Task.last_update: datetime.datetime.now()}, synchronize_session=False)

        if jobs:
            db_scoped_session.add_all(jobs)

        db_scoped_session.commit()

        logger.info(
            '----send_task_2_worker send task {}, produce jobs={}, used {} seconds. '
            .format(task_id, real_accounts_num,
                    (datetime.datetime.now() - time_it_beg).seconds))
    except BaseException as e:
        logger.exception(
            'send_task_2_worker exception task id={}, e={}'.format(task_id, e))
        db_scoped_session.rollback()
    finally:
        ScopedSession.remove()

    return True
Example #35
0
def monitor(id, type):
    with app.app_context():
        status = '成功执行但未监测到变化'
        global_content = None
        try:
            if type == 'html':
                task = Task.query.filter_by(id=id).first()
                url = task.url
                selector_type = task.selector_type
                selector = task.selector
                is_chrome = task.is_chrome
                regular_expression = task.regular_expression
                mail = task.mail
                wechat = task.wechat
                pushover = task.pushover
                name = task.name
                rule = task.rule
                headers = task.headers

                last = Content.query.filter_by(task_id=id,
                                               task_type=type).first()
                if not last:
                    last = Content(id)

                last_content = last.content
                content = get_content(url, is_chrome, selector_type, selector,
                                      regular_expression, headers)
                global_content = content
                status_code = is_changed(rule, content, last_content)
                logger.info(
                    'rule: {}, content: {}, last_content: {}, status_code: {}'.
                    format(rule, content, last_content, status_code))
                if status_code == 1:
                    status = '监测到变化,但未命中规则,最新值为{}'.format(content)
                    last.content = content
                    db.session.add(last)
                    db.session.commit()
                elif status_code == 2:
                    status = '监测到变化,且命中规则,最新值为{}'.format(content)
                    msg = wraper_msg(content, url)
                    send_message(msg, name, mail, wechat, pushover)
                    last.content = content
                    db.session.add(last)
                    db.session.commit()
                elif status_code == 3:
                    status = '监测到变化,最新值为{}'.format(content)
                    msg = wraper_msg(content, url)
                    send_message(msg, name, mail, wechat, pushover)
                    last.content = content
                    db.session.add(last)
                    db.session.commit()
            elif type == 'rss':
                rss_task = RSSTask.query.filter_by(id=id).first()
                url = rss_task.url
                name = rss_task.name
                mail = rss_task.mail
                wechat = rss_task.wechat
                pushover = rss_task.pushover

                last = Content.query.filter_by(task_id=id,
                                               task_type=type).first()
                if not last:
                    last = Content(id, 'rss')

                last_guid = last.content
                item = get_rss_content(url)
                if item['guid'] != last_guid:
                    global_content = content
                    content = wraper_rss_msg(item)
                    send_message(content, name, mail, wechat, pushover)
                    last.content = item['guid']
                    db.session.add(last)
                    db.session.commit()
                    status = '监测到变化,最新值:' + item['title']

        except FunctionTimedOut:
            logger.error(traceback.format_exc())
            status = '解析RSS超时'
        except PartNotificationError as e:
            logger.error(traceback.format_exc())
            status = repr(e)
            last.content = global_content
            db.session.add(last)
            db.session.commit()
        except Exception as e:
            logger.error(traceback.format_exc())
            status = repr(e)

        task_status = TaskStatus.query.filter_by(task_id=id,
                                                 task_type=type).first()
        task_status.last_run = datetime.now()
        task_status.last_status = status
        db.session.add(task_status)
        db.session.commit()