Exemple #1
0
    def prepare(self, langs=None):
        LOG.debug("bottle request.headers.keys %s", request.headers.get("Accept-Language", None))
        if langs is None:
            langs = self.get_language_list()
            LOG.debug("web client accept langs: %s", langs)

        prepared_key = tuple(langs)
        if prepared_key in self.prepared:
            trans = self.prepared.get(prepared_key)
            if trans:
                trans.install(True)
                self.app._ = trans.gettext
            else:
                self.app._ = lambda s: s
            return

        LOG.debug("setup i18n ...")
        try:
            trans = gettext.translation(self.domain, self.locale_dir, languages=langs)
            trans.install(True)
            self.app._ = trans.gettext
            self.prepared[prepared_key] = trans
        except Exception, e:
            LOG.warn('can not install application for language "%s" with locale path as "%s"', langs, self.locale_dir)
            LOG.warn(e)
            self.app._ = lambda s: s
            self.prepared[prepared_key] = None
    def set_duty_cycle(self, pin, value):
        """

        :param pin:
        :param value:
        :return:
        """
        if pin not in self.output_pins:
            log.error("pin %s was not registered as an output" % pin)
            return

        output_pin = self.output_pins[pin]

        if not output_pin['pwm']:
            log.error("pwm was not registered at pin %d" % pin)

        if value > 100:
            log.warning("Given dutycycle (%d) is bigger than 100. Has been set to 100" % value)
            value = 100
        if value < 0:
            log.warning("Given dutycycle (%d) is lower than 0. Has been set to 0" % value)
            value = 0

        if not output_pin['pwm_startet']:
            output_pin['pwm'].start(value)
            output_pin['pwm_startet'] = True
        else:
            output_pin['pwm'].ChangeDutyCycle(value)

        output_pin['dutycycle'] = value
        log.info("Dutycycle of pin %d has been set to %d" % (pin, value))
Exemple #3
0
def convex_hull(points):
    """Returns the points on the convex hull of points in CCW order."""

    # Increasing guesses for the hull size.
    for guess in (2**(2**t) for t in range(len(points))):
        LOG("Guess", guess)
        hulls = []
        for i in range(0, len(points), guess):
            # LOG(".")
            # Split the points into chunks of (roughly) the guess.
            chunk = points[i:i + guess]
            # Find the corresponding convex hull of these chunks.
            hulls.append(graham_scan(chunk))

        # Find the extreme point and initialize the list of (hull,point) with it.
        hullpt_pairs = [min_hull_pt_pair(hulls)]

        # Ensure we stop after no more than "guess" iterations.
        for __ in range(guess):
            LOG("*")
            pair = next_hull_pt_pair(hulls, hullpt_pairs[-1])
            if pair == hullpt_pairs[0]:
                # Return the points in sequence
                LOGN("o")
                return [hulls[h][i] for h, i in hullpt_pairs]
            hullpt_pairs.append(pair)
        LOGN("x")
Exemple #4
0
 def _download_video( self ):
     try:
         # spam log file
         LOG( ">>> _download_video(title: %s)" % ( repr( self.g_title ), ), heading=True )
         # get filepath and tmp_filepath
         tmppath, self.filepath = get_legal_filepath( self.g_title, self.params[ "download" ], self.settings[ "play_mode" ], self.settings[ "download_path" ], self.settings[ "use_title" ], self.settings[ "use_trailer" ] )
         # only download if the trailer doesn't exist
         if ( not os.path.isfile( self.filepath.encode( "utf-8" ) ) ):
             # only need to retrieve video if not in tmp path
             if ( not os.path.isfile( tmppath.encode( "utf-8" ) ) ):
                 # fetch the video
                 urllib.urlretrieve( self.params[ "download" ], tmppath.encode( "utf-8" ), self._report_hook )
             # create the conf file for xbox and copy to final location
             ok = self._finalize_download( tmppath )
             # if the copy failed raise an error
             if ( not ok ): raise
     except Exception, e:
         # oops, notify user what error occurred
         LOG( str( e ), xbmc.LOGERROR )
         # filepath is not always released immediately, we may need to try more than one attempt, sleeping between
         urllib.urlcleanup()
         remove_tries = 3
         while remove_tries and os.path.isfile( tmppath ):
             try:
                 os.remove( tmppath.encode( "utf-8" ) )
             except:
                 remove_tries -= 1
                 xbmc.sleep( 1000 )
         pDialog.close()
         self.filepath = ""
Exemple #5
0
    def _decode_multipart_body(self):
        boundary = self._get_boundary()
        if not boundary:
            LOG.warn("Message detected as multipart but boundary "
                     "declaration was not found")
            return

        start_bnd = '\n' + '--' + boundary + '\n'
        end_bnd = '\n' + '--' + boundary + '--' + '\n'

        self.body = '\n' + self.body  # for string matching purpose

        try:
            start_index = self.body.index(start_bnd) + len(start_bnd)
        except ValueError:
            LOG.warn("Cannot find boundaries in body, "
                     "treating as single message")
            self._decode_single_body()
            return

        end_index = self.body.rfind(end_bnd)
        if end_index < 0:
            end_index = None

        content = self.body[start_index:end_index]

        parts = content.split(start_bnd)

        messages = [MessageParser(self.name, msg_content, self.headers)
                    for msg_content in parts]
        self.body = "\n".join([msg.body for msg in messages])
        for msg in messages:
            self.email_stats += msg.email_stats
            self.html_stats += msg.html_stats
Exemple #6
0
async def predict_sentiment(request):
    payload = await request.json()
    x = parse_input(payload)
    if not x:
        return web.Response(
            text=
            "No or empty input received. Please post your text body as json of the form {'Text': text body string}.",
            status=400)
    elif x is KeyError:
        return web.Response(
            text=
            "Wrong input. Please post your text body as json of the form {'Text': text body string}.",
            status=400)
    elif x is ValueError:
        return web.Response(
            text=
            "Wrong input type. Please post your text body as json of the form {'Text': text body string}.",
            status=400)
    else:
        try:
            ft = get_features_from_text(x, tfidf_v)
            predicted_spam = get_prediction([ft])
            return web.json_response(
                {"spam probability": round(predicted_spam[0][0], 3)})
        except Exception as e:
            LOG.error(f"Application errored: {e.__repr__()}")
            return web.Response(text="Something has gone very wrong indeed...",
                                status=500)
Exemple #7
0
 def size(self):
     size = 0
     try:
         size = os.path.getsize(self.filepath)
     except Exception as e:
         LOG.error("Get {0}'s size error".format(self.path))
         LOG.exception(e)
     return size
Exemple #8
0
 def filtered_files(self):
     LOG.debug("%s filtered_files", self.__class__.__name__)
     ret = list()
     for k, v in self.char_table.items():
         #LOG.debug("{0} {1}".format(k, v))
         if len(v) > 1:
             ret.extend(v)
     return ret
Exemple #9
0
 def filtered_files(self):
     LOG.debug("%s filtered_files", self.__class__.__name__)
     ret = list()
     for k, v in self.char_table.items():
         #LOG.debug("{0} {1}".format(k, v))
         if len(v) > 1:
             ret.extend(v)
     return ret
Exemple #10
0
 def dup_files(self):
     LOG.debug("%s dup_files", self.__class__.__name__)
     ret = list()
     for k, v in self.char_table.items():
         #            LOG.debug("{0} {1}".format(k, v))
         #            LOG.debug(k)
         if len(v) > 1:
             ret.append(v)
     return ret
Exemple #11
0
    def dup_files(self):
        LOG.debug("%s dup_files", self.__class__.__name__)
        ret = list()
        for k, v in self.char_table.items():
#            LOG.debug("{0} {1}".format(k, v))
#            LOG.debug(k)
            if len(v) > 1:
                ret.append(v)
        return ret
    def cleanup(self):
        """



        """
        log.info("Cleanup")
        self._all_output_off()
        GPIO.cleanup()
Exemple #13
0
 def find(self):
     LOG.debug("%s find", self.__class__.__name__)
     for _file in self.files:
         character = _file.size
         entry = self.char_table.get(character)
         if entry:
             entry.append(_file)
         else:
             self.char_table[character] = [_file]
         self.progress = self.progress + 1
    def send_msg(self, sender, target, data):
        if self._check_in_black(sender, target):
            LOG.info('Packet block %s->%s' % (sender, target))
            return

        queue = self.queues.get(target)
        if queue is not None:
            queue.put(data)
        else:
            raise Exception('Cannot send data')
Exemple #15
0
def detail(id):
    blogs = execute_sql('select id, title, created_time, content from blog where id =?', (id,))
    if not len(blogs):
        raise HTTPError(404, 'Blog does not exist.')
    LOG.debug('column created time type: %s', type(blogs[0]['created_time']))
    #myapp.set_lang(['jp'])
    msg = myapp._('test i18n in py')
    LOG.debug('i18n msg: %s', msg)
    myapp.set_lang(['ja'])
    return {'blog': blogs[0], 'msg':msg, '_': myapp._}
Exemple #16
0
 def find(self):
     LOG.debug("%s find", self.__class__.__name__)
     for _file in self.files:
         character = _file.size
         entry = self.char_table.get(character)
         if entry:
             entry.append(_file)
         else:
             self.char_table[character] = [_file]
         self.progress = self.progress + 1
Exemple #17
0
 def handle_endtag(self, tag):
     if tag in HTML_PARSER_IGNORE_TAGS:
         return
     if self.tag_stack and tag == self.tag_stack[0]:
         del self.tag_stack[0]
     else:
         LOG.debug("Invalid closing tag at %r", self.getpos())
         if tag in self.tag_stack:
             idx = self.tag_stack.index(tag)
             del self.tag_stack[:idx + 1]
             self.stats['errors_count'] += idx + 1
Exemple #18
0
 def dump2csv(self, output_csv):
     LOG.debug("%s dump2csv", self.__class__.__name__)
     rows = list()
     for files in self.sorted_dup_files:
         data = [utils.size_renderer(files[0].size)]
         #data.append(files[0].size)
         data.extend([_file.path for _file in files])
         rows.append(data)
     with open(output_csv, 'wb') as f:
         writer = UnicodeCSVWriter(f)
         writer.writerows(rows)
Exemple #19
0
 def find(self):
     LOG.debug("%s find", self.__class__.__name__)
     for _file in self.files:
         character = _file.character
         if character == setting.UNKNOWN_SYMBOL:
             continue
         entry = self.char_table.get(character)
         if entry:
             entry.append(_file)
         else:
             self.char_table[character] = [_file]
         self.progress = self.progress + 1
Exemple #20
0
    def process_view(self, request, view_func, *view_args, **view_kwargs):
        if request.path == '/token/refresh/' and JWT_AUTH_REFRESH_COOKIE in request.COOKIES:
            if request.body != b'':
                data = json.loads(request.body)
                data['refresh'] = request.COOKIES[JWT_AUTH_REFRESH_COOKIE]
                request._body = json.dumps(data).encode('utf-8')
            else:
                LOG.info(
                    f"\n{L.FAIL} Error in api/users/middleware.py: The incoming request body must be set to an empty object.{L.ENDC}\n"
                )

        return None
Exemple #21
0
 def find(self):
     LOG.debug("%s find", self.__class__.__name__)
     for _file in self.files:
         md5sum = _file.md5sum
         if md5sum == setting.UNKNOWN_SYMBOL:
             continue
         entry = self.char_table.get(md5sum)
         if entry:
             entry.append(_file)
         else:
             self.char_table[md5sum] = [_file]
         self.progress = self.progress + 1
Exemple #22
0
 def find(self):
     LOG.debug("%s find", self.__class__.__name__)
     for _file in self.files:
         md5sum = _file.md5sum
         if md5sum == setting.UNKNOWN_SYMBOL:
             continue
         entry = self.char_table.get(md5sum)
         if entry:
             entry.append(_file)
         else:
             self.char_table[md5sum] = [_file]
         self.progress = self.progress + 1
Exemple #23
0
 def find(self):
     LOG.debug("%s find", self.__class__.__name__)
     for _file in self.files:
         character = _file.character
         if character == setting.UNKNOWN_SYMBOL:
             continue
         entry = self.char_table.get(character)
         if entry:
             entry.append(_file)
         else:
             self.char_table[character] = [_file]
         self.progress = self.progress + 1
Exemple #24
0
    def _decode_body(self):
        if self.mime_type and (self.mime_type.startswith('image/') or
                               self.mime_type.startswith('application/')):
            LOG.info("Body marked as image, skipping body")
            self.email_stats['attached_images'] += 1
            self.body = ""
            return

        if self.is_multipart:
            LOG.debug("Detected multipart/* content-type")
            self._decode_multipart_body()
        else:
            self._decode_single_body()
Exemple #25
0
def parse_input(json_text):
    try:
        text_body = json_text["Text"]
        if type(text_body) != str:
            LOG.warning(
                f"Wrong user input. User posted us the following input {text_body} of type: {type(text_body)}"
            )
            return ValueError
        return str(text_body).strip()
    except KeyError:
        LOG.warning(
            f"Wrong user input. User posted us the following: {json_text}")
        return KeyError
Exemple #26
0
 def dump2file(self, output_file):
     LOG.debug("%s dump2file", self.__class__.__name__)
     if utils.get_python_version() == 3:
         fp = codecs.open(output_file, "w", "utf-8")
     else:
         fp = open(output_file, 'w')
     try:
         for files in self.sorted_dup_files:
             fp.write("================\n")
             for _file in files:
                 size = utils.size_renderer(_file.size)
                 fp.write("Size: {0}, File: {1}\n".format(size, _file.path))
     finally:
         fp.close()
Exemple #27
0
 def start_find(self):
     LOG.debug("start_find button click")
     self.disable_all()
     # start to find
     paths = [self.path_field1.get()]
     if (self.path_field2.get()):
         paths.append(self.path_field2.get())
     if (self.path_field3.get()):
         paths.append(self.path_field3.get())
     if (self.path_field4.get()):
         paths.append(self.path_field4.get())
     LOG.debug(paths)
     LOG.debug("Full Scan {0}".format(str(self.full_scan.get())))
     LOG.debug("Ouput csv {0}".format(str(self.output_csv.get())))
     do_it = messagebox.askyesno('',
                 'It may take several minutes to complete please wait')
     if not do_it:
         self.enable_all()
         return
     self.find_complete = False
     filters = [
         core.algorithm.SizeFilter(),
         core.algorithm.CharacterFilter()
     ]
     if (self.full_scan.get()):
         filters.append(core.algorithm.FullScanner())
     dup_finder = core.dup_finder.DupFinder(paths, filters)
     self.status_thread = threading.Thread(target=self.update_status,
                                                 args=(dup_finder,))
     self.find_thread = threading.Thread(target=self.background_find,
                                                 args=(dup_finder,))
     self.find_thread.start()
     self.status_thread.start()
Exemple #28
0
def analyze_galera(net: Network):
    '''Fig2. Analyze of the Galera protocol on the `net` Network.

    This function builds a specific `filter` with the CIDR of the
    `net` and prints TCP `packets` that are related to the Galera
    communications.

    '''
    LOG.info(f'Listen packet on {ifname(net)}...')
    scapy.sniff(
        iface=ifname(net),
        count=10,  # Stop analysis after 10 packets
        filter=f'net {net["cidr"]} and tcp and port 4567',
        prn=lambda packet: packet.summary())
Exemple #29
0
 def start_find(self):
     LOG.debug("start_find button click")
     self.disable_all()
     # start to find
     paths = [self.path_field1.get()]
     if (self.path_field2.get()):
         paths.append(self.path_field2.get())
     if (self.path_field3.get()):
         paths.append(self.path_field3.get())
     if (self.path_field4.get()):
         paths.append(self.path_field4.get())
     LOG.debug(paths)
     LOG.debug("Full Scan {0}".format(str(self.full_scan.get())))
     LOG.debug("Ouput csv {0}".format(str(self.output_csv.get())))
     do_it = messagebox.askyesno(
         '', 'It may take several minutes to complete please wait')
     if not do_it:
         self.enable_all()
         return
     self.find_complete = False
     filters = [
         core.algorithm.SizeFilter(),
         core.algorithm.CharacterFilter()
     ]
     if (self.full_scan.get()):
         filters.append(core.algorithm.FullScanner())
     dup_finder = core.dup_finder.DupFinder(paths, filters)
     self.status_thread = threading.Thread(target=self.update_status,
                                           args=(dup_finder, ))
     self.find_thread = threading.Thread(target=self.background_find,
                                         args=(dup_finder, ))
     self.find_thread.start()
     self.status_thread.start()
Exemple #30
0
 def fetch_trailers( self, category=None ):
     # spam log file
     LOG( ">>> fetch_trailers(category: %s, rating: %s, quality: %s)" % ( repr( category ), ( "G", "PG", "PG-13", "R", "NC-17", "No Limit", )[ self.settings[ "rating" ] ], self.Fanart, ), heading=True )
     ok = False
     # initialize trailers list
     trailers = []
     # fetch source
     xmlSource = self._get_xml_source()
     # parse source and add our items
     if ( xmlSource ):
         ok = self._parse_xml_source( xmlSource, category )
     # spam log file
     LOG( "<<< fetch_trailers()", heading=True )
     # return result
     return ok
Exemple #31
0
 def find(self):
     LOG.info("%s walk start", self.__class__.__name__)
     walker = Walker()
     self.__update_step(walker)
     file_instances = walker.walk(self.path_list)
     LOG.info("%s walk end", self.__class__.__name__)
     prev_filter = self.filter_list[0]
     prev_filter.set_files(file_instances)
     self.total = len(file_instances)
     self.__update_step(prev_filter)
     prev_filter.find()
     for _filter in self.filter_list[1:]:
         _filter.set_files(prev_filter.filtered_files)
         self.__update_step(_filter)
         _filter.find()
         prev_filter = _filter
Exemple #32
0
 def _get_xml_source( self ):
     try:
         xmlSource = []
         # grab all xml sources
         for source in ( "current.xml", "current_480p.xml", "current_720p.xml", ):
             # set path and url
             base_path = os.path.join( self.BASE_CURRENT_SOURCE_PATH, source )
             base_url = self.BASE_CURRENT_URL % ( source, )
             # get the source files date if it exists
             try: date = os.path.getmtime( base_path )
             except: date = 0
             # we only refresh if it's been more than a day, 24hr * 60min * 60sec
             refresh = ( ( time.time() - ( 24 * 60 * 60 ) ) >= date )
             # only fetch source if it's been more than a day
             if ( refresh ):
                 # open url
                 usock = urllib.urlopen( base_url )
             else:
                 # open path
                 usock = open( base_path, "r" )
             # read source
             xmlSource += [ usock.read() ]
             # close socket
             usock.close()
             # save the xmlSource for future parsing
             if ( refresh ):
                 ok = self._save_xml_source( xmlSource[ -1 ], base_path )
         # return source
         return xmlSource
     except Exception, e:
         # oops, notify user what error occurred
         LOG( str( e ), xbmc.LOGERROR )
         # error so return empty string
         return []
Exemple #33
0
def merge_enclosed(graph, segments):
    """Merge nodes of the given graph that are on edges that do not intersects with the given segments."""
    i = 0
    while i < len(graph.keys()):
        node = graph.keys()[i]
        j = 0
        altered = False
        while j < len(graph[node]):
            neighbour = graph[node][j]
            assert (neighbour in graph)
            edge = (node, neighbour)

            if not any(
                    geometry.segment_intersection(edge, seg)
                    for seg in segments):
                graph = merge_nodes(graph, edge[0], edge[1],
                                    geometry.middle(*edge))
                altered = True
                LOG(".")
                break
            else:
                j += 1
                continue

        if altered:
            i = 0
        else:
            i += 1

    return graph
Exemple #34
0
 def md5sum(self):
     ret = setting.UNKNOWN_SYMBOL
     try:
         with open(self.filepath, 'rb') as fp:
             chunk_size = 1024 * hashlib.md5().block_size
             chksum = hashlib.md5()
             while True:
                 chunk = fp.read(chunk_size)
                 if chunk:
                     chksum.update(chunk)
                 else:
                     break
         ret = chksum.hexdigest()
     except Exception as e:
         LOG.error("Get {0}'s md5sum error".format(self.path))
         LOG.exception(e)
     return ret
Exemple #35
0
def do_post():
    title = request.forms.title
    content = request.forms.content
    id = request.forms.id
    if not id:
        LOG.debug('add new post...', id)
        created_time = datetime.now()
        modified_time = created_time
        execute_sql('insert into blog values (?,?,?,?,?)' ,
                        (None, title, content, created_time, modified_time))
        redirect('/')
    else:
        LOG.debug('post id is: %s', id)
        modified_time = datetime.now()
        execute_sql('update blog set title=?, content=?, last_modified_time=? where id=?' ,
                        (title, content, modified_time, id))
        redirect('/post/%s' % id)
Exemple #36
0
 def _parse_categories( self, xmlSource, category ):
     try:
         # encoding
         encoding = re.findall( "<\?xml version=\"[^\"]*\" encoding=\"([^\"]*)\"\?>", xmlSource[ 0 ] )[ 0 ]
         # gather all trailer records <movieinfo>
         trailers = re.findall( "<movieinfo id=\".+?\"><info>.+?<studio>(.*?)</studio>.+?<director>(.*?)</director>.+?(?:<cast>(.+?)</cast>)?<genre>(.+?)</genre>.+?</movieinfo>", xmlSource[ 0 + ( 2 * ( self.settings[ "trailer_quality" ] > 1 and self.settings[ "trailer_hd_only" ] ) ) ] )
         # use dictionary method to filter out duplicates; set our item list
         dupes = {}
         # enumerate thru the trailers list and create our category list
         for studio, directors, actors, genres in trailers:
             # genres category
             if ( category == "genres" ):
                 # parse genres 
                 genres = re.findall( "<name>(.+?)</name>", genres )
                 # filter out duplicates
                 for x in genres:
                     dupes[ x ] = ( x, "DefaultGenre.png", None, )
             elif ( category == "studios" ):
                 # filter out duplicates
                 dupes[ studio ] = ( studio, "DefaultStudios.png", None, )
             elif ( category == "directors" ):
                 # parse directors 
                 directors = directors.split( ", " )
                 # filter out duplicates
                 for x in directors:
                     dupes[ x ] = ( x, "DefaultDirector.png", None, )
             elif ( category == "actors" ):
                 # parse actors 
                 actors = re.findall( "<name>(.+?)</name>", actors )
                 # filter out duplicates
                 for x in actors:
                     dupes[ x ] = ( x, "DefaultActor.png", "special://profile/Thumbnails/Video/%s/%s" % ( xbmc.getCacheThumbName( "actor" + x )[ 0 ], xbmc.getCacheThumbName( "actor" + x ) ,), )
         # grap the categories
         categories = dupes.values()
         # sort our list
         categories.sort()
         # get our media item
         dirItem = DirectoryItem()
         # set total items
         dirItem.totalItems = len( categories )
         # set as folder since these our virtual folders to filtered lists
         dirItem.isFolder = True
         # add settings menu item
         dirItem.addContextMenuItem( "", "DUMMY TO CLEAR CONTEXT MENU" )
         # enumerate thru and add our items
         for title, icon, thumb in categories:
             # check for cached thumb (only actors)
             if ( thumb is None or not os.path.isfile( thumb ) ):
                 thumb = icon
             # create our listitem
             dirItem.listitem = xbmcgui.ListItem( title, iconImage=icon, thumbnailImage=thumb )
             # set the url
             dirItem.url = "%s?category=%s" % ( sys.argv[ 0 ], urllib.quote_plus( repr( "%s: %s" % ( category, unicode( title, "utf-8" ), ) ) ), )
             # add item
             self.MediaWindow.add( dirItem )
     except Exception, e:
         # oops, notify user what error occurred
         LOG( str( e ), xbmc.LOGERROR )
    def set_output(self, pin, output=1):
        """


        :param pin:
        :param output:
        """
        if pin not in self.output_pins:
            log.error("Pin %s was not registered as an output" % pin)
            return

        output_pin = self.output_pins[pin]

        if output_pin['pwm_started']:
            output_pin['pwm'].stop()

        GPIO.output(pin, output)
        log.info("Output of pin %d has been set to %d" % (pin, output))
Exemple #38
0
 def character(self):
     chunks = list()
     size = self.size
     ret = setting.UNKNOWN_SYMBOL
     try:
         with open(self.filepath, 'rb') as f:
             for i in range(NUM_CHUNKS):
                 f.seek(int(size / NUM_CHUNKS) * i)
                 chunk = f.read(NUM_BLOCKS * hashlib.md5().block_size)
                 chunks.append(chunk)
         character_chunk = bytes()
         for chunk in chunks:
             character_chunk = character_chunk + chunk
         ret = hashlib.md5(character_chunk).hexdigest()
     except Exception as e:
         LOG.error("Get {0}'s character error".format(self.path))
         LOG.exception(e)
     return ret
Exemple #39
0
def run():
    LOG.info(f"\n{L.SUCCESS} Cleaning all job entries...{L.ENDC}")
    Job.objects.all().delete()
    LOG.info(f"{L.SUCCESS} Running the API crawler...{L.ENDC}")
    crawler()
    LOG.info(f"{L.SUCCESS} Running the scraper...{L.ENDC}\n")
    scraper()

    earliest_job = datetime.now(tzlocal()) - timedelta(days=45)
    Job.objects.filter(date__lt=earliest_job).delete()
    LOG.info(f"{L.SUCCESS} Done{L.ENDC}\n")
Exemple #40
0
def crawler():
    counter = 1
    for url_ref in config.FULL_URLS:
        resp = requests.get(url_ref)
        if resp.status_code == 200:
            _, name = get_name(url_ref)
            # Ensure folder exists
            folter_path = create_folder([config.LYRICS_FOLDER, name])
            # Get all links
            parsed_html = BeautifulSoup(resp.content, features='html.parser')
            lyrics_links = parsed_html.select('.listalbum-item a')
            LOG.info(f"Number of {name.upper()} songs: {len(lyrics_links)}")

            lyric_paths = [extract_link(link) for link in lyrics_links]

            for lyric_path in lyric_paths:

                try:
                    writer, song_name = get_name(lyric_path)
                    if name != writer:
                        alt_folder = create_folder(
                            [config.LYRICS_FOLDER, writer])
                        lyrics_file = alt_folder.joinpath(song_name + '.txt')
                        file_found = lyrics_file.is_file()
                    else:
                        writer = name
                        lyrics_file = folter_path.joinpath(song_name + '.txt')
                        file_found = lyrics_file.is_file()

                    if not file_found:
                        # url = config.BASE_URL + lyric_path
                        text = get_lyrics(lyric_path).strip()
                        LOG.info("Downloading (" + str(counter).zfill(3) +
                                 f") [{writer}]: {song_name}")
                        counter += 1

                        with open(lyrics_file, "w") as f:
                            f.write(text)
                        time.sleep(config.CRAWLER_WAIT +
                                   config.CRAWLER_WAIT * random.random())

                except IndexError:
                    LOG.error(
                        f"Access denied while scraping: {lyric_path} \n"
                        f"Try increasing the waiting time.\n"
                        f"Finishing the scrapping for the moment. Try to access on your browser to unblock access"
                    )
                    return
                except Exception as err:
                    print(f"ERROR: {lyric_path}: {err}")

        else:
            LOG.warning(f"Unable to load: {url_ref}")
Exemple #41
0
    def get_language_list(self):
        if self.lang_code is not None:
            return [self.lang_code]

        expected_langs = self.extra_client_expected_langs()
        LOG.debug("web client accept langs: %s", expected_langs)

        lang_codes = []

        for lang, priority in expected_langs:
            lang_country = lang.split("-")
            if len(lang_country) == 1:
                lang_codes.append(lang)
                continue
            country = lang_country[1]
            lang_codes.append("%s_%s" % (lang_country[0], country))
            lang_codes.append("%s_%s" % (lang_country[0], country.swapcase()))

        return lang_codes
Exemple #42
0
def run_training():
    df = read_data_as_df(DATA_PATH)

    new_df = get_feature_df(df)
    tfidf_df = get_tfidf(new_df)

    X, y = preprocess_data(tfidf_df)

    X_test, y_test = X.loc[X.index == 'TEST'], y.loc[y.index == 'TEST'].values
    X_train, y_train = X.loc[(X.index == 'TRAIN') | (
        X.index == 'VALIDATION')], y.loc[(y.index == 'TRAIN') |
                                         (y.index == 'VALIDATION')].values
    LOG.info(f"Training set: {X_train.shape}, Testing set: {X_test.shape}")
    LOG.info(
        f"Training set positive examples: {y_train.sum()}, Testing set positive examples: {y_test.sum()}"
    )

    clf_d = get_trained_models(["RF", "SGD", "LR", "SVM"], X_train, y_train)
    evaluate_models(clf_d, X_train, X_test, y_train, y_test)
Exemple #43
0
def restart():
    LOG("Restart jobs")

    db[INCIDENTS_COLLECTION_NAME].drop()
    db[ADMINS_REACTION_COLLECTION_NAME].drop()
    db[SERVICES_COLLECTION_NAME].drop()
    db[ADMINS_COLLECTION_NAME].drop()
    populate_services(read_yaml(SERVICES_YAML_PATH))
    populate_admins(read_yaml(ADMINS_YAML_PATH))

    #TODO stop previous jobs
    hosts = [DKRON_ADDRESS]
    api = Dkron(hosts)

    jobs = [x['id'] for x in api.get_jobs()]
    LOG("Delete jobs")
    for job in jobs:
        api.delete_job(job)
        LOG(f'Deleted {job}')

    services = get_services()
    LOG("Start scheduling jobs")
    for service in services:
        api.apply_job({
            "schedule": f'@every { service["frequency"] }s',
            "name": str(service['_id']),
            "timezone": "Europe/Warsaw",
            "owner": "Alerting Platform",
            "executor": "shell",
            "executor_config": {
                "command": f'python3 /app/worker.py --url {service["url"]}'
            },
            "processors": {
                "log": {
                    "forward": "true"
                }
            },
            "tags": {
                "worker": "crawler:1"
            }
        })
        LOG(f'Scheduled {service["url"]}')
Exemple #44
0
def check_alive_thread():
    while True:
        # LOG.debug('检查客户端是否在线...')
        # 不断地对keep_alive_count做递减的操作,当递减到零后,会认为客户端下线了
        from protocol import CLIENT_GROUP

        # 记录下线的客户端的id
        offline_id_list = list()

        for client in CLIENT_GROUP.get_members().values():
            client.keep_alive_count -= 1
            if client.keep_alive_count == 0:
                offline_id_list.append(client.id)

        # 移除那些离线的客户端
        for c_id in offline_id_list:
            if CLIENT_GROUP.remove(c_id):
                LOG.debug('id: {}的客户端已经离线'.format(c_id))

        sleep(CHECK_INTERVAL)
Exemple #45
0
def main():
    path = sys.argv[1]
    LOG.info("Start to find duplicated files on {0}".format(path))

    if os.path.isfile(path):
        start_time = time.time()
        print (File(path).md5sum)
        print (File(path).size)
        end_time = time.time()
        print (end_time - start_time)
    else:
        start_time = time.time()
        filters = [
            core.algorithm.SizeFilter(),
            core.algorithm.CharacterFilter()
        ]
        dup_finder = core.dup_finder.DupFinder([path], filters)
        dup_finder.find()
        end_time = time.time()
        #dup_finder.dump2file("output.txt")
        dup_finder.dump2csv("output.csv")
        print (end_time - start_time)
        print (utils.size_renderer(dup_finder.dup_size))
Exemple #46
0
def main():
    global PROGRAM_CONFIG

    # 加载配置
    config = load_config()
    if config:
        PROGRAM_CONFIG = config

    # 获取日志等级
    if PROGRAM_CONFIG['log_level'] == 'DEBUG':
        config_logging(logging.DEBUG, PROGRAM_CONFIG['log_name'])
    elif PROGRAM_CONFIG['log_level'] == 'INFO':
        config_logging(logging.debug, PROGRAM_CONFIG['log_name'])
    elif PROGRAM_CONFIG['log_level'] == 'ERROR':
        config_logging(logging.ERROR, PROGRAM_CONFIG['log_name'])
    elif PROGRAM_CONFIG['log_level'] == 'WARN':
        config_logging(logging.WARN, PROGRAM_CONFIG['log_name'])
    else:
        config_logging()

    server = ProxyServer(PROGRAM_CONFIG['listen_port'])

    # 用于检查客户端在线状态的线程
    check_thread = Thread(target=check_alive_thread)

    try:
        print('启动检查客户端在线状态的线程')
        check_thread.start()

        server.start()
    except Exception as err:
        LOG.error(str(err))
    finally:
        print('写入配置信息')
        save_config(PROGRAM_CONFIG)
        print('退出程序')
Exemple #47
0
def initial_message(msg):
    log.info('Initial Message from connected Client: %s' % msg['data'])
Exemple #48
0
def on_connect():
    log.info('Client connected!')
Exemple #49
0
def on_disconnect():
    log.info('Client disconnected!')
Exemple #50
0
def signal_handler(signal, frame):
    print ""
    log.info("Shutting down ...")
    # do some cleaning stuff
    gpio_controller.cleanup()
    sys.exit(0);
Exemple #51
0
 def set_files(self, files):
     LOG.debug("%s set_files", self.__class__.__name__)
     self.files = files
Exemple #52
0
    def _decode_single_body(self):
        self.body = self.body.strip()
        cte = self.headers.get('Content-Transfer-Encoding', '').lower()
        if 'quoted-printable' in cte:
            LOG.debug("Detected quoted-printable encoding, decoding")
            self.body = quopri.decodestring(self.body)
        if 'base64' in cte:
            LOG.debug("Detected base64 encoding, decoding")
            try:
                self.body = base64.decodestring(self.body)
            except base64.binascii.Error:
                LOG.info("base64 decoder failed, trying partial decoding")
                self.body = base64_partial_decode(self.body)

        LOG.debug("Detected charset: %s", self.charset)
        try:
            self.body = self.body.decode(
                validate_charset(self.charset) and self.charset or 'ascii',
                'strict'
            )
        except UnicodeDecodeError:
            LOG.info('Error during strict decoding')
            self.email_stats['charset_errors'] = 1
            self.body = self.body.decode(
                validate_charset(self.charset) and self.charset or 'ascii',
                'ignore'
            )

        if self._guess_html():
            LOG.debug("Message recognized as HTML")
            self._parse_html()
        else:
            LOG.debug("Message recognized as plaintext")
Exemple #53
0
def delete(id):
    LOG.info('delete blog #%s', id)
    execute_sql('delete from blog where id =?', (id,))
    redirect('/')