Beispiel #1
0
    def cmd(self, command, args, channel, **kwargs):
        if command == 'aww' or command == 'sad' or command == 'depressed' or command == 'morn':
            if self.aww_updated_at == None or (time() - self.aww_updated_at) > 600:
                url = 'https://reddit.com/r/aww/hot.json?limit=100'
                self.aww_list = json.loads(self.reddit_opener.open(url).read())

                if 'error' in self.aww_list:
                    print(self.aww_list['error'])
                    return [(0, channel, kwargs['from_nick'],
                             'I\'m so sorry, Reddit gave me an error. :(')]
                else:
                    self.aww_updated_at = time()

            item = self.aww_list['data']['children'][randint(1,len(self.aww_list['data']['children']) - 1)]
            message = item['data']['url']
            message = self.redditfix_re.sub('&', message)
            nick = kwargs['from_nick']

            if args:
                args = args.split(' ')
                if len(args) >= 1 and len(args[0]) > 2:
                    nick = args[0]
                if len(args) == 2 and len(args[1]) > 2:
                    if args[1][0] == '#':
                        channel = args[1]
                    else:
                        channel = '#' + args[1]
            if command == 'morn':
                message = choice(self.mornlist) + message

            try:
                return[(0, channel, to_bytes(to_unicode(nick)), to_bytes(to_unicode(message)))]
            except:
                return[(0, channel, kwars['from_nick'], 'Couldn\'t convert to unicode. :(')]
Beispiel #2
0
 def get_spec(self):
     """ Return the contents of this package's RPM spec file """
     if os.path.exists(os.path.join(self.repo_path, 'dead.package')):
         return to_unicode(
             self.repo.tree()['dead.package'].data_stream.read())
     return to_unicode(
         self.repo.tree()[self.package + '.spec'].data_stream.read())
Beispiel #3
0
def print_specs(specs):

    print Ut.headings("SPECIFICATIONS DATA", line=False)

    # PRINT SPECS
    for key, data in specs.items():
        if key == "target" or key == "source":
            new_line = "\n"
        else:
            new_line = ""

        if type(data) == str or type(data) == int or type(data) == unicode:
            value = to_unicode(data)  #.encode(encoding='utf-8')
        elif type(data) == float or type(data) == int:
            value = to_unicode(data)
        else:
            value = type(data)

        print "{}\t{:22}{}".format(new_line, key,
                                   "{}".format(": {}".format(to_bytes(value))))

        if type(data) == dict:
            for detail, val in data.items():
                print "\t\t{:18}: {}".format(detail, val)
            print ""
    def test_exception_to_unicode_custom(self):
        # If given custom functions, then we should not mangle
        c = [
            lambda e: converters.to_unicode(e.args[0], encoding='euc_jp'),
            lambda e: converters.to_unicode(e, encoding='euc_jp')
        ]
        tools.eq_(
            converters.exception_to_unicode(self.exceptions['euc_jpn'],
                                            converters=c), self.u_japanese)
        c.extend(converters.EXCEPTION_CONVERTERS)
        tools.eq_(
            converters.exception_to_unicode(self.exceptions['euc_jpn'],
                                            converters=c), self.u_japanese)

        c = [
            lambda e: converters.to_unicode(e.args[0], encoding='latin1'),
            lambda e: converters.to_unicode(e, encoding='latin1')
        ]
        tools.eq_(
            converters.exception_to_unicode(self.exceptions['latin1_spanish'],
                                            converters=c), self.u_spanish)
        c.extend(converters.EXCEPTION_CONVERTERS)
        tools.eq_(
            converters.exception_to_unicode(self.exceptions['latin1_spanish'],
                                            converters=c), self.u_spanish)
 def test_to_unicode_errors(self):
     tools.eq_(converters.to_unicode(self.latin1_spanish),
               self.u_mangled_spanish_latin1_as_utf8)
     tools.eq_(converters.to_unicode(self.latin1_spanish, errors='ignore'),
               self.u_spanish_ignore)
     tools.assert_raises(UnicodeDecodeError, converters.to_unicode,
                         *[self.latin1_spanish], **{'errors': 'strict'})
Beispiel #6
0
 def get_spec(self):
     """ Return the contents of this package's RPM spec file """
     if os.path.exists(os.path.join(self.repo_path, 'dead.package')):
         return to_unicode(
             self.repo.tree()['dead.package'].data_stream.read())
     return to_unicode(self.repo.tree()[self.package +
                                        '.spec'].data_stream.read())
    def test_non_string(self):
        '''Test deprecated non_string parameter'''
        # unicode
        tools.assert_raises(TypeError, converters.to_unicode, *[5], **{'non_string': 'foo'})
        tools.ok_(converters.to_unicode(5, non_string='empty') == u'')
        tools.ok_(converters.to_unicode(5, non_string='passthru') == 5)
        tools.ok_(converters.to_unicode(5, non_string='simplerepr') == u'5')
        tools.ok_(converters.to_unicode(5, non_string='repr') == u'5')
        tools.assert_raises(TypeError, converters.to_unicode, *[5], **{'non_string': 'strict'})

        tools.ok_(converters.to_unicode(UnicodeNoStr(), non_string='simplerepr') == self.u_spanish)
        tools.ok_(converters.to_unicode(StrNoUnicode(), non_string='simplerepr') == self.u_spanish)
        tools.ok_(converters.to_unicode(StrReturnsUnicode(), non_string='simplerepr') == self.u_spanish)
        tools.ok_(converters.to_unicode(UnicodeReturnsStr(), non_string='simplerepr') == self.u_spanish)
        tools.ok_(converters.to_unicode(UnicodeStrCrossed(), non_string='simplerepr') == self.u_spanish)

        obj_repr = converters.to_unicode(object, non_string='simplerepr')
        tools.ok_(obj_repr == u"<type 'object'>" and isinstance(obj_repr, unicode))

        # Bytes
        tools.ok_(converters.to_bytes(5) == '5')
        tools.ok_(converters.to_bytes(5, non_string='empty') == '')
        tools.ok_(converters.to_bytes(5, non_string='passthru') == 5)
        tools.ok_(converters.to_bytes(5, non_string='simplerepr') == '5')
        tools.ok_(converters.to_bytes(5, non_string='repr') == '5')

        # Raise a TypeError if the msg is non_string and we're set to strict
        tools.assert_raises(TypeError, converters.to_bytes, *[5], **{'non_string': 'strict'})
        # Raise a TypeError if given an invalid non_string arg
        tools.assert_raises(TypeError, converters.to_bytes, *[5], **{'non_string': 'INVALID'})

        # No __str__ method so this returns repr
        string = converters.to_bytes(UnicodeNoStr(), non_string='simplerepr')
        self._check_repr_bytes(string, 'UnicodeNoStr')

        # This object's _str__ returns a utf8 encoded object
        tools.ok_(converters.to_bytes(StrNoUnicode(), non_string='simplerepr') == self.utf8_spanish)

        # This object's __str__ returns unicode which to_bytes converts to utf8
        tools.ok_(converters.to_bytes(StrReturnsUnicode(), non_string='simplerepr') == self.utf8_spanish)
        # Unless we explicitly ask for something different
        tools.ok_(converters.to_bytes(StrReturnsUnicode(),
            non_string='simplerepr', encoding='latin1') == self.latin1_spanish)

        # This object has no __str__ so it returns repr
        string = converters.to_bytes(UnicodeReturnsStr(), non_string='simplerepr')
        self._check_repr_bytes(string, 'UnicodeReturnsStr')

        # This object's __str__ returns unicode which to_bytes converts to utf8
        tools.ok_(converters.to_bytes(UnicodeStrCrossed(), non_string='simplerepr') == self.utf8_spanish)

        # This object's __repr__ returns unicode which to_bytes converts to utf8
        tools.ok_(converters.to_bytes(ReprUnicode(), non_string='simplerepr')
                == u'ReprUnicode(El veloz murciélago saltó sobre el perro perezoso.)'.encode('utf8'))
        tools.ok_(converters.to_bytes(ReprUnicode(), non_string='repr') ==
                u'ReprUnicode(El veloz murciélago saltó sobre el perro perezoso.)'.encode('utf8'))

        obj_repr = converters.to_bytes(object, non_string='simplerepr')
        tools.ok_(obj_repr == "<type 'object'>" and isinstance(obj_repr, str))
Beispiel #8
0
def encoding_path(s):
    s = s.strip()
    if check_os() == 'Windows':
        return to_unicode(s, 'utf-8')
    s = to_unicode(s)
    try:
        return s.encode('utf-8', 'replace')
    except Exception, e:
        log.warning(e)
Beispiel #9
0
 def listen(self, msg, channel, **kwargs):
     for karmatoken in self.reg.findall(msg):
         match = [x for x in karmatoken if x != ""][0]
         if match.startswith("++") or match.endswith("++"):
             if match.strip("++") != kwargs['from_nick']:
                 self.backend(channel).positiveKarma(to_unicode(match.strip("++")))
             
         if match.startswith("--") or match.endswith("--"):
             self.backend(channel).negativeKarma(to_unicode(match.strip("--")))
Beispiel #10
0
    def import_page(self, page, pages):
        title = to_unicode(page['post_title'])
        self.vprint("BEGIN Importing page '{0}'".format(to_bytes(title)), 1)
        mezz_page = self.get_or_create(RichTextPage, title=title)
        mezz_page.created = page['post_modified']
        mezz_page.updated = page['post_modified']
        mezz_page.content = to_unicode(page['post_content'])
        mezz_page.save()

        self.vprint("END   Importing page'{0}'".format(to_bytes(title)), 1)
Beispiel #11
0
    def import_page(self, page, pages):
        title = to_unicode(page['post_title'])
        self.vprint("BEGIN Importing page '{0}'".format(to_bytes(title)), 1)
        mezz_page = self.get_or_create(RichTextPage, title=title)
        mezz_page.created = page['post_modified']
        mezz_page.updated = page['post_modified']
        mezz_page.content = to_unicode(page['post_content'])
        mezz_page.save()

        self.vprint("END   Importing page'{0}'".format(to_bytes(title)), 1)
Beispiel #12
0
def enconding_path(s):
    if sys.platform in ["darwin"]:
        s = to_unicode(s)
        try:
            s = s.encode('utf-8', 'replace')
        except:
            pass
    elif sys.platform in ["win32"]:
        s = to_unicode(s, 'utf-8')
        # s = s.encode('utf-8', 'replace')
    return s
Beispiel #13
0
def to_display(s):
    s = s.strip()
    info = osinfo.OSInfo()
    if info == 'Windows':
        return to_unicode(s, 'utf-8')
    s = to_unicode(s)
    try:
        return s.encode('utf-8', 'replace')
    except Exception:
        pass
    return s
Beispiel #14
0
    def test_to_unicode_nonstring_with_objects_that_have__unicode__and__str__(self):
        '''Test that to_unicode handles objects that have  __unicode__ and  __str__ methods'''
        if sys.version_info < (3, 0):
            # None of these apply on python3 because python3 does not use __unicode__
            # and it enforces __str__ returning str
            tools.eq_(converters.to_unicode(UnicodeNoStr(), nonstring='simplerepr'), self.u_spanish)
            tools.eq_(converters.to_unicode(StrNoUnicode(), nonstring='simplerepr'), self.u_spanish)
            tools.eq_(converters.to_unicode(UnicodeReturnsStr(), nonstring='simplerepr'), self.u_spanish)

        tools.eq_(converters.to_unicode(StrReturnsUnicode(), nonstring='simplerepr'), self.u_spanish)
        tools.eq_(converters.to_unicode(UnicodeStrCrossed(), nonstring='simplerepr'), self.u_spanish)
Beispiel #15
0
def to_display(s):
    s = s.strip()
    info = osinfo.OSInfo()
    if info == 'Windows':
        return to_unicode(s, 'utf-8')
    s = to_unicode(s)
    try:
        return s.encode('utf-8', 'replace')
    except Exception:
        pass
    return s
Beispiel #16
0
 def test_guess_encoding_with_chardet(self):
     # We go this slightly roundabout way because multiple encodings can
     # output the same byte sequence.  What we're really interested in is
     # if we can get the original unicode string without knowing the
     # converters beforehand
     tools.ok_(to_unicode(self.utf8_spanish,
         misc.guess_encoding(self.utf8_spanish)) == self.u_spanish)
     tools.ok_(to_unicode(self.latin1_spanish,
         misc.guess_encoding(self.latin1_spanish)) == self.u_spanish)
     tools.ok_(to_unicode(self.utf8_japanese,
         misc.guess_encoding(self.utf8_japanese)) == self.u_japanese)
Beispiel #17
0
 def _get_factoid_interaction(self, txn, factoid_key, location, protocol,
                              channel):
     """
     Gets a factoid if it exists, otherwise raises MissingFactoidError
     :return: (factoid_name, [entry, entry, ...])
     """
     self.logger.trace(_("Getting factoid params: factoid_key = '%s', "
                         "location = '%s', protocol = '%s', "
                         "channel = '%s'"),
                       factoid_key,
                       location,
                       protocol,
                       channel)
     if location is None:
         self.logger.trace(_("Location is None - getting all factoids with "
                             "key '%s'"), factoid_key)
         txn.execute("SELECT location, protocol, channel, factoid_name, "
                     "info FROM factoids WHERE factoid_key = ?",
                     (
                         to_unicode(factoid_key),
                     ))
         results = txn.fetchall()
         if len(results) > 0:
             # Check for channel match
             for row in results:
                 if ((row[0] == self.CHANNEL and row[1] == protocol and
                      row[2] == channel)):
                     self.logger.trace(_("Match found (channel)!"))
                     return (row[3], row[4].split("\n"))
             # Check for protocol match
             for row in results:
                 if row[0] == self.PROTOCOL and row[1] == protocol:
                     self.logger.trace(_("Match found (protocol)!"))
                     return (row[3], row[4].split("\n"))
             # Check for global match
             for row in results:
                 if row[0] == self.GLOBAL:
                     self.logger.trace(_("Match found (global)!"))
                     return (row[3], row[4].split("\n"))
     else:
         txn.execute("SELECT location, protocol, channel, factoid_name, "
                     "info FROM factoids WHERE factoid_key = ? AND "
                     "location = ? AND protocol = ? AND channel = ?",
                     (
                         to_unicode(factoid_key),
                         to_unicode(location),
                         to_unicode(protocol),
                         to_unicode(channel)
                     ))
         results = txn.fetchall()
         if len(results) > 0:
             return (results[0][3], results[0][4].split("\n"))
     raise MissingFactoidError(_("Factoid '%s' does not exist")
                               % factoid_key)
Beispiel #18
0
    def test_to_unicode(self):
        '''Test to_unicode when the user gives good values'''
        tools.eq_(converters.to_unicode(self.u_japanese, encoding='latin1'), self.u_japanese)

        tools.eq_(converters.to_unicode(self.utf8_spanish), self.u_spanish)
        tools.eq_(converters.to_unicode(self.utf8_japanese), self.u_japanese)

        tools.eq_(converters.to_unicode(self.latin1_spanish, encoding='latin1'), self.u_spanish)
        tools.eq_(converters.to_unicode(self.euc_jp_japanese, encoding='euc_jp'), self.u_japanese)

        tools.assert_raises(TypeError, converters.to_unicode, *[5], **{'nonstring': 'foo'})
Beispiel #19
0
    def _ugettext(self, message):
        if not isbasestring(message):
            return u''
        if self._fallback:
            msg = to_unicode(message, encoding=self.input_charset)
            try:
                message = self._fallback.ugettext(msg)
            except (AttributeError, UnicodeError):
                # Ignore UnicodeErrors: We'll do our own decoding later
                pass

        # Make sure we're returning unicode
        return to_unicode(message, encoding=self.input_charset)
Beispiel #20
0
    def ugettext(self, message):
        if not isinstance(message, basestring):
            return u''
        if self._fallback:
            msg = to_unicode(message, encoding=self.input_charset)
            try:
                message = self._fallback.ugettext(msg)
            except (AttributeError, UnicodeError):
                # Ignore UnicodeErrors: We'll do our own decoding later
                pass

        # Make sure we're returning unicode
        return to_unicode(message, encoding=self.input_charset)
Beispiel #21
0
 def assign(self, **kwargs):
     if 'id' in kwargs: self.id = kwargs['id']
     if 'eid' in kwargs: self.eid = to_unicode(kwargs['eid'])
     if 'caption' in kwargs: self.caption = to_unicode(kwargs['caption'])
     #        if 'suggestion' in kwargs: self.suggestion = to_unicode(kwargs['suggestion'])
     if 'description' in kwargs:
         self.description = to_unicode(kwargs['description'])
     if 'alias' in kwargs: self.alias = to_unicode(kwargs['alias'])
     if 'creation_time' in kwargs:
         self.creation_time = datetime.fromtimestamp(
             float(kwargs['creation_time']))
     if 'creator' in kwargs: self.creator = kwargs['creator']
     return self
Beispiel #22
0
    def test_normalize(self, string_in, string_out, regex):
        """
        Unit testing function for paideia_utils.sanitize_greek()

        """
        print 'string in', string_in
        print 'expected string out', string_out
        actual = GreekNormalizer().normalize(string_in)
        print 'actual string out', actual
        assert actual == string_out
        assert isinstance(actual, unicode)
        regex1 = re.compile(to_unicode(regex), re.I | re.U)
        assert re.match(regex1, to_unicode(actual))
Beispiel #23
0
    def __init__(self,entryurl, gApi = None):
        #check for google api key, request if necessary
        if gApi != None:
            self.gApi = defineApi(gApi)
        else:
            self.gApi = defineApi()

        self.entryurl = entryurl
        table = self.getHTMLTable()
        fieldList = self.getFieldList(table)
        entryList = self.getEntryList(table)
        
        self.ecolex_id = self.getEntry('Legislation ID number',fieldList,entryList)
        self.name = self.getEntry('Title of tex',fieldList,entryList)
        self.country = self.getEntry('Country',fieldList,entryList)
        self.date = self.getEntry('Date of tex',fieldList,entryList)
        self.legtype = self.getEntry('Type of documen',fieldList,entryList)
        self.source = self.getEntry('Source',fieldList,entryList)
        self.fulltext = self.getUrl('Link to full tex',fieldList,entryList)
        self.abstract = self.getEntry('Abstrac',fieldList,entryList)

        # concatenate subject and keywords only of there are entries
        keywordsA = self.getEntry('Keyword(s)',fieldList,entryList)
        keywordsB = self.getEntry('Subject(s)',fieldList,entryList)
        if keywordsA != None and keywordsB != None:
            self.keywords = keywordsA + '; ' + keywordsB
        elif keywordsA != None and keywordsB == None:
            self.keywords = keywordsA
        elif keywordsA == None and keywordsB != None:
            self.keywords = keywordsB
        else:
            self.keywords = None

        # check language and translate keywords and abstract if not english
        if self.abstract != None:
            languageSample = ' '.join(self.abstract.split(' ')[0:5])
            self.language = identify(languageSample,self.gApi)
        
            if self.language != 'en':
                translationAB = translate(self.abstract,self.language,'en',self.gApi)
                self.abstractEN = to_unicode(translationAB)
                translationKW = translate(self.abstract,self.language,'en',self.gApi)
                self.keywordsEN = to_unicode(translationKW)
            else:
                self.abstractEN = self.abstract
                self.keywordsEN = self.keywords
        else:
            self.language = None
            self.abstractEN = None
            self.keywordsEN = None
Beispiel #24
0
def populate(comps='comps-f16', do_dependencies=True):
    from yum.comps import Comps

    session = DBSession()

    c = Comps()
    c.add('comps/%s.xml' % comps)

    for group in c.groups:
        g = Group(id=group.groupid,
                  name=group.name,
                  description=group.description)
        session.add(g)

        for package in group.packages:
            p = session.query(Package).filter_by(
                name=to_unicode(package)).first()
            if not p:
                p = Package(name=package)
                session.add(p)
            p.group = g

        session.flush()

    root = Root(name=u'Fedora')
    session.add(root)
    session.flush()

    for category in c.categories:
        c = Category(id=category.categoryid,
                     name=category.name,
                     description=category.description)
        session.add(c)
        root.categories.append(c)
        for group in category.groups:
            g = session.query(Group).filter_by(
                group_id=to_unicode(group)).first()
            if not g:
                print "Cannot find group: %s" % group
            else:
                g.category = c

        session.flush()

    if do_dependencies:
        for package in session.query(Package).all():
            add_dependencies(package, session)

    session.commit()
Beispiel #25
0
    def test_exception_to_unicode_custom(self):
        # If given custom functions, then we should not mangle
        c = [lambda e: converters.to_unicode(e, encoding='euc_jp')]
        tools.ok_(converters.exception_to_unicode(self.exceptions['euc_jpn'],
            converters=c) == self.u_japanese)
        c.extend(converters.EXCEPTION_CONVERTERS)
        tools.ok_(converters.exception_to_unicode(self.exceptions['euc_jpn'],
            converters=c) == self.u_japanese)

        c = [lambda e: converters.to_unicode(e, encoding='latin1')]
        tools.ok_(converters.exception_to_unicode(self.exceptions['latin1_spanish'],
            converters=c) ==  self.u_spanish)
        c.extend(converters.EXCEPTION_CONVERTERS)
        tools.ok_(converters.exception_to_unicode(self.exceptions['latin1_spanish'],
            converters=c) ==  self.u_spanish)
Beispiel #26
0
 def _get_user_txn(self, txn, user, protocol):
     user = user.lower()
     user = to_unicode(user)
     txn.execute(u"SELECT * FROM users WHERE user=? AND protocol=?",
                 (user, protocol))
     r = txn.fetchone()
     return r
Beispiel #27
0
def brute_txt(fn):
    """
    Convert anything to txt
    """
    # if url, send there
    if not os.path.exists(fn):
            print('! No filename found')
            return ''
    # get ext
    ext=os.path.splitext(fn)[-1][1:]
    txt=''
    # epub
    if ext in {'epub'}:
        txt=epub2txt(fn)
    elif ext in {'xml','html','htm'}:
        with open(fn) as f:
            content=f.read()
            txt=xml2txt(content,CONTENT_TAGS[ext])
    elif ext in {'txt'}:
        with open(fn,'rb') as f:
            content=f.read()
            return to_unicode(content)
    elif ext in {'pdf'}:
        txt=pdf2txt(fn)
    else:
        import fulltext
        txt=fulltext.get(fn)
        if not txt: return ''
    # clean
    txt=txt.replace('\xa0', ' ') 
    if 'project gutenberg ebook' in txt.lower():
        txt=clean_gutenberg(txt)
    return txt
    def lngettext(self, msgid1, msgid2, n):
        if n == 1:
            tmsg = msgid1
        else:
            tmsg = msgid2

        if not isinstance(msgid1, basestring):
            return ''
        msgid1 = to_unicode(msgid1, encoding=self.input_charset)
        try:
            #pylint:disable-msg=E1101
            tmsg = self._catalog[(msgid1, self.plural(n))]
        except KeyError:
            if self._fallback:
                try:
                    tmsg = self._fallback.ngettext(msgid1, msgid2, n)
                except UnicodeError:
                    # Ignore UnicodeErrors: We'll do our own encoding next
                    pass

        # Make sure that we're returning a str
        if self._output_charset:
            return to_bytes(tmsg, encoding=self._output_charset,
                    nonstring='empty')
        return to_bytes(tmsg, encoding=locale.getpreferredencoding(),
                nonstring='empty')
Beispiel #29
0
    def translate_command(self, protocol, caller, source, command, raw_args,
                          parsed_args):
        if len(parsed_args) < 2:
            caller.respond(
                "Usage: {CHARS}" + command + " <languages> <text>"
            )
            return

        langs = parsed_args[0]
        text = u" ".join([to_unicode(x) for x in parsed_args[1:]])

        if u":" in langs:
            split = langs.split(u":")
            from_lang, to_lang = split[0], split[1]
        else:
            from_lang, to_lang = u"", langs

        try:
            translation = self.goslate.translate(text, to_lang, from_lang)

            source.respond(u"[{}] {}".format(to_lang, translation))
        except Error as e:
            source.respond(u"Translation error: {}".format(e))
        except Exception as e:
            self.logger.exception("Translation error")
            source.respond(u"Translation error: {}".format(e))
Beispiel #30
0
    def _insert_or_update_user(self, txn, user, protocol):
        user = user.lower()
        user = to_unicode(user)

        txn.execute("SELECT * FROM users WHERE user=? AND protocol=?",
                    (user, protocol))
        r = txn.fetchone()

        now = time.time()

        if r is None:
            txn.execute(
                "INSERT INTO users VALUES (?, ?, ?)",
                (user,
                 protocol,
                 now)
            )
            return False
        else:
            txn.execute(
                "UPDATE users SET at=? WHERE user=? AND protocol=?",
                (now,
                 user,
                 protocol)
            )
            return True
Beispiel #31
0
def main(argv):
    if len(argv) == 3:
        output = open(argv[2], 'w')

    elif len(argv) == 2:
        print("Default output file used: \"output.txt\"")
        output = open("output.txt", 'w')

    else:
        print("Usage: ./srcYUML2graphViz.py [inputFile] [outputFile]")
        exit(1)

    output.write("digraph hierarchy {\nsize=\"5, 5\"\n")
    output.write("node[shape=record,style=filled,fillcolor=gray95]\n")
    output.write(
        "edge[dir=\"both\", arrowtail=\"empty\", arrowhead=\"empty\", labeldistance=\"2.0\"]\n"
    )

    file = open(argv[1], "rb")  #rb is R-read and B-binary
    input_str = to_unicode(file.read())
    file.close()
    input = InputStream(input_str)
    lexer = srcYUML2graphVizLexer(input)
    stream = CommonTokenStream(lexer)
    parser = srcYUML2graphVizParser(stream)
    tree = parser.yuml()
    relay = Relay(output)  #realization of Listener
    walker = ParseTreeWalker()
    walker.walk(relay, tree)
    #enterYuml(self, tree)
    #ok here is where I need to start learning to navigate the parse tree
    #print(Trees.toStringTree(tree, None, parser))

    output.close()
Beispiel #32
0
    def _ugettext(self, message):
        if not isbasestring(message):
            return u''
        message = to_unicode(message, encoding=self.input_charset)
        try:
            message = self._catalog[message] #pylint:disable-msg=E1101
        except KeyError:
            if self._fallback:
                try:
                    message = self._fallback.ugettext(message)
                except (AttributeError, UnicodeError):
                    # Ignore UnicodeErrors: We'll do our own encoding next
                    pass

        # Make sure that we're returning unicode
        return to_unicode(message, encoding=self.input_charset)
Beispiel #33
0
    def _lngettext(self, msgid1, msgid2, n):
        if n == 1:
            tmsg = msgid1
        else:
            tmsg = msgid2

        if not isbasestring(msgid1):
            return ''
        u_msgid1 = to_unicode(msgid1, encoding=self.input_charset)
        try:
            #pylint:disable-msg=E1101
            tmsg = self._catalog[(u_msgid1, self.plural(n))]
        except KeyError:
            if self._fallback:
                try:
                    tmsg = self._fallback.lngettext(msgid1, msgid2, n)
                except (AttributeError, UnicodeError):
                    # Ignore UnicodeErrors: We'll do our own encoding next
                    pass

        # Next decide what encoding to use for the strings we return
        output_encoding = (self._output_charset or
                locale.getpreferredencoding())

        return self._reencode_if_necessary(tmsg, output_encoding)
Beispiel #34
0
    def update_details(self, bug: typing.Union[typing.Any, None],
                       bug_entity: 'models.Bug'):
        """
        Update the details on bug_entity to match what is found in Bugzilla.

        Args:
            bug: The Bugzilla Bug we will use to update our own Bug object from. If None,
                 bug_entity.bug_id will be used to fetch the object from Bugzilla.
            bug_entity: The bug we wish to update.
        """
        if not bug:
            try:
                bug = self.bz.getbug(bug_entity.bug_id)
            except xmlrpc_client.Fault as err:
                if err.faultCode == 102:
                    bug_entity.title = 'Private bug'
                    bug_entity.private = True
                    log.info("Marked bug #" + str(bug_entity.bug_id) + " as private.")
                else:
                    bug_entity.title = 'Invalid bug number'
                    log.error("Got fault from Bugzilla: fault code: %d, fault string: %s" % (
                        err.faultCode, err.faultString))
                return
            except Exception:
                log.exception("Unknown exception from Bugzilla")
                return
        if bug.product == 'Security Response':
            bug_entity.parent = True
        bug_entity.title = to_unicode(bug.short_desc)
        if isinstance(bug.keywords, str):
            keywords = bug.keywords.split()
        else:  # python-bugzilla 0.8.0+
            keywords = bug.keywords
        if 'security' in [keyword.lower() for keyword in keywords]:
            bug_entity.security = True
Beispiel #35
0
    def _reencode_if_necessary(self, message, output_encoding):
        '''Return a byte string that's valid in a specific charset.

        .. warning:: This method may mangle the message if the inpput encoding
            is not known or the message isn't represntable in the chosen
            output encoding.
        '''
        valid = False
        msg = None
        try:
            valid = byte_string_valid_encoding(message, output_encoding)
        except TypeError:
            # input was unicode, so it needs to be encoded
            pass

        if valid:
            return message
        try:
            # Decode to unicode so we can re-encode to desired encoding
            msg = to_unicode(message, encoding=self.input_charset,
                    nonstring='strict')
        except TypeError:
            # Not a string; return an empty byte string
            return ''

        # Make sure that we're returning a str of the desired encoding
        return to_bytes(msg, encoding=output_encoding)
Beispiel #36
0
 def _get_user_txn(self, txn, user, protocol):
     user = user.lower()
     user = to_unicode(user)
     txn.execute(u"SELECT * FROM users WHERE user=? AND protocol=?",
                 (user, protocol))
     r = txn.fetchone()
     return r
Beispiel #37
0
def process_input(text, stop_word, stop_symbols_string):

    try:
        temp = to_bytes(text.lower())

        # temp = str(temp).decode(encoding="utf-8")

        # REMOVE DATA IN BRACKETS
        # REMOVE (....) FROM THE VALUE
        temp = remove_info_in_bracket(temp)

        # REMOVE STOP WORLD
        if len(stop_word) > 0:
            temp = remove_stop_words(temp, stop_word)

        # REMOVE SYMBOLS OR CHARACTER
        if stop_symbols_string is not None and len(stop_symbols_string) > 0:
            pattern = str("[{}]".format(str(stop_symbols_string).strip())).replace(" ", "")
            temp = re.sub(pattern, "", temp)

        return to_unicode(temp)

    except Exception as error:
        print "!!!!!!!!!!!!! PROBLEM !!!!!!!!!!!!!!!!!!!"
        print str(error.message)
        return text
Beispiel #38
0
def textPreprocess(text):
    #load dictionary of specialist lexicon
    global medical
    if not medical:
        file = open('./dictionary_files/medical.pkl', 'r')
        medical = pickle.load(file)
        file.close()

    # Force all the text to be of the same type, deals with accented letters
    text = to_unicode(text)
    # Covert to lower case
    text = text.lower()
    # Split text into sentences
    sentence_token = nltk.data.load('tokenizers/punkt/english.pickle')
    sentences = sentence_token.tokenize(text.strip())

    text = []

    for sentence in sentences:
        # Split on non alphanumeric and non hyphen characters and keep delimiter
        sentence = re.split("([^\w\-]+)||\b", sentence)
        # Delete whitespace tokens
        sentence = [word.replace(' ','') for word in sentence]
        sentence = filter(None, sentence)
        #look up variable length sequences of words in medical dictionary, stem them if not present
        numTokens = 5 #phrases up to 5 words long
        while (numTokens > 0):
            processedText=[]
            start=0
            #Check each phrase of n tokens while there are sufficient tokens after
            while (start <= (len(sentence) - numTokens)):
                phrase=sentence[start]
                nextToken=1
                while nextToken < numTokens:
                    #add the next tokens to the current one
                    phrase = phrase+" "+sentence[start+nextToken]
                    nextToken += 1
                if phrase in medical:
                    #convert tokens to one token from specialist
                    processedText.append(medical[phrase])
                    # skip the next tokens
                    start += (numTokens)
                elif numTokens == 1:
                    #individual tokens, stem them if not in specialist and keep
                    processedText.append(stem.snowball.EnglishStemmer().stem(phrase))
                    start += 1
                else:
                    #token not part of phrase, keep
                    processedText.append(sentence[start])
                    start += 1
            #Keep remaining tokens without enough tokens after them
            while (start < len(sentence)):
                processedText.append(sentence[start])
                start += 1
            sentence = processedText
            numTokens -= 1
        text.append(sentence)
    # text.append(["end_rep"])

    return(text)
Beispiel #39
0
 def test_guess_encoding_with_chardet_uninstalled(self):
     if chardet:
         raise SkipTest('chardet installed, euc_jp will not be mangled')
     else:
         tools.ok_(to_unicode(self.euc_jp_japanese,
             misc.guess_encoding(self.euc_jp_japanese)) ==
             self.u_mangled_euc_jp_as_latin1)
Beispiel #40
0
    def from_files(cls, filenames, *args, **kwds):
        """Create Verbatims instance from verbfiles."""
        cls.logger.info('Getting verbatims from file(s):\t%s...', filenames)
        verbs = []
        for filename in filenames:
            with open(filename) as infile:
                incsv = csv.DictReader(infile)

                sql_id_fieldname = None
                code_fieldname = None
                text_fieldname = None
                for fieldname in incsv.fieldnames:
                    if re.search(r'(verb|dc).*id', fieldname, re.I):
                        sql_id_fieldname = fieldname

                    elif re.search(r'\b(code|label)', fieldname, re.I):
                        digit = re.search(r'\d+', fieldname)
                        if not digit or int(digit.group(0)) == 1:
                            code_fieldname = fieldname

                    elif re.search(r'(verb)?.*(text|original)',
                                   fieldname, re.I):
                        text_fieldname = fieldname

                for i, row in enumerate(incsv):
                    verb = Verbatim(
                        sql_id=row.get(sql_id_fieldname, i),
                        code=row.get(code_fieldname, None),
                        text=to_unicode(row[text_fieldname]))
                    verbs.append(verb)
        verbs = cls(verbs, *args, **kwds)

        cls.logger.debug('Retrieved %d verbatims', len(verbs))
        return verbs
Beispiel #41
0
 def test_guess_encoding_with_chardet(self):
     # We go this slightly roundabout way because multiple encodings can
     # output the same byte sequence.  What we're really interested in is
     # if we can get the original unicode string without knowing the
     # converters beforehand
     tools.ok_(
         to_unicode(self.utf8_spanish, misc.guess_encoding(
             self.utf8_spanish)) == self.u_spanish)
     tools.ok_(
         to_unicode(self.latin1_spanish,
                    misc.guess_encoding(self.latin1_spanish)) ==
         self.u_spanish)
     tools.ok_(
         to_unicode(self.utf8_japanese,
                    misc.guess_encoding(self.utf8_japanese)) ==
         self.u_japanese)
Beispiel #42
0
def main(argv):
	if len(argv) == 3:
		output = open(argv[2], 'w')

	elif len(argv) == 2:
		print("Default output file used: \"output.txt\"")
		output = open("output.txt", 'w')

	else:
		print("Usage: ./srcYUML2graphViz.py [inputFile] [outputFile]")
		exit(1)


	output.write("digraph hierarchy {\nsize=\"5, 5\"\n")
	output.write("node[shape=record,style=filled,fillcolor=gray95]\n")
	output.write("edge[dir=\"both\", arrowtail=\"empty\", arrowhead=\"empty\", labeldistance=\"2.0\"]\n")
	

	file = open(argv[1], "rb") #rb is R-read and B-binary
	input_str = to_unicode(file.read())
	file.close()
	input = InputStream(input_str)
	lexer = srcYUML2graphVizLexer(input)
	stream = CommonTokenStream(lexer)
	parser = srcYUML2graphVizParser(stream)
	tree = parser.yuml()
	relay = Relay(output)#realization of Listener
	walker = ParseTreeWalker()
	walker.walk(relay, tree)
	#enterYuml(self, tree)
	#ok here is where I need to start learning to navigate the parse tree
	#print(Trees.toStringTree(tree, None, parser))

	output.close()
Beispiel #43
0
    def update_details(self, bug, bug_entity):
        """
        Update the details on bug_entity to match what is found in Bugzilla.

        Args:
            bug (bugzilla.bug.Bug or None): The Bugzilla Bug we will use to update our own Bug
                object from. If None, bug_entity.bug_id will be used to fetch the object from
                Bugzilla.
            bug_entity(bodhi.server.models.Bug): The bug we wish to update.
        """
        if not bug:
            try:
                bug = self.bz.getbug(bug_entity.bug_id)
            except xmlrpc_client.Fault:
                bug_entity.title = 'Invalid bug number'
                log.exception("Got fault from Bugzilla")
                return
            except Exception:
                log.exception("Unknown exception from Bugzilla")
                return
        if bug.product == 'Security Response':
            bug_entity.parent = True
        bug_entity.title = to_unicode(bug.short_desc)
        if isinstance(bug.keywords, six.string_types):
            keywords = bug.keywords.split()
        else:  # python-bugzilla 0.8.0+
            keywords = bug.keywords
        if 'security' in [keyword.lower() for keyword in keywords]:
            bug_entity.security = True
Beispiel #44
0
def import_koji_pkgs():
    """ Get the latest packages from koji.  These might not have made it into
    yum yet, so we won't even check for their summary until later.
    """
    log.info("Importing koji packages")
    import koji
    session = koji.ClientSession("https://koji.fedoraproject.org/kojihub")
    count = 0
    tagbp = 230 # id of el6-docs tag to bypass
    packages = session.listPackages()
    log.info("Looking through %i packages from koji." % len(packages))
    for package in packages:
        name = to_unicode(package['package_name'])
        pkg_tagstatus = session.getPackageConfig(tagbp, package['package_id'])
        if pkg_tagstatus is not None:
            log.info("Package %s is tagged with el6-docs and will be skipped")\
                 % name
            continue # skipping if the package is tagged
        try:
            p = m.Package.by_name(ft.SESSION, name)
        except NoResultFound:
            log.debug(name + ' -')
            count += 1
            ft.SESSION.add(m.Package(name=name, summary=u''))

    log.info("Got %i new packages from koji (with no summaries yet)" % count)
Beispiel #45
0
    def ugettext(self, message):
        if not isinstance(message, basestring):
            return u''
        message = to_unicode(message, encoding=self.input_charset)
        try:
            message = self._catalog[message]  #pylint:disable-msg=E1101
        except KeyError:
            if self._fallback:
                try:
                    message = self._fallback.ugettext(message)
                except (AttributeError, UnicodeError):
                    # Ignore UnicodeErrors: We'll do our own encoding next
                    pass

        # Make sure that we're returning unicode
        return to_unicode(message, encoding=self.input_charset)
Beispiel #46
0
    def write_string_tag(self, attribute, value, tag, last):
        """ Write a [predicate Value@tag] line """
        # Make sure attribute is okay for URI
        attribute = self.check_for_uri(attribute)

        last = u'.' if last is True else u";"
        if value is not None:
            value = value.strip()
            # print attribute + " " + value + " " + tag
            value = to_unicode(value)
            if value != "":
                to_write = to_unicode(
                    self.inlineFormat.format(attribute,
                                             self.triple_value_tag(value, tag),
                                             last))
                self.turtleWriter.write(to_write)
Beispiel #47
0
    def lngettext(self, msgid1, msgid2, n):
        if n == 1:
            tmsg = msgid1
        else:
            tmsg = msgid2

        if not isinstance(msgid1, basestring):
            return ''
        u_msgid1 = to_unicode(msgid1, encoding=self.input_charset)
        try:
            #pylint:disable-msg=E1101
            tmsg = self._catalog[(u_msgid1, self.plural(n))]
        except KeyError:
            if self._fallback:
                try:
                    tmsg = self._fallback.ngettext(msgid1, msgid2, n)
                except (AttributeError, UnicodeError):
                    # Ignore UnicodeErrors: We'll do our own encoding next
                    pass

        # Next decide what encoding to use for the strings we return
        output_encoding = (self._output_charset
                           or locale.getpreferredencoding())

        return self._reencode_if_necessary(tmsg, output_encoding)
Beispiel #48
0
    def _reencode_if_necessary(self, message, output_encoding):
        '''Return a byte string that's valid in a specific charset.

        .. warning:: This method may mangle the message if the inpput encoding
            is not known or the message isn't represntable in the chosen
            output encoding.
        '''
        valid = False
        msg = None
        try:
            valid = byte_string_valid_encoding(message, output_encoding)
        except TypeError:
            # input was unicode, so it needs to be encoded
            pass

        if valid:
            return message
        try:
            # Decode to unicode so we can re-encode to desired encoding
            msg = to_unicode(message,
                             encoding=self.input_charset,
                             nonstring='strict')
        except TypeError:
            # Not a string; return an empty byte string
            return ''

        # Make sure that we're returning a str of the desired encoding
        return to_bytes(msg, encoding=output_encoding)
    def test_to_unicode(self):
        '''Test to_unicode when the user gives good values'''
        tools.eq_(converters.to_unicode(self.u_japanese, encoding='latin1'),
                  self.u_japanese)

        tools.eq_(converters.to_unicode(self.utf8_spanish), self.u_spanish)
        tools.eq_(converters.to_unicode(self.utf8_japanese), self.u_japanese)

        tools.eq_(
            converters.to_unicode(self.latin1_spanish, encoding='latin1'),
            self.u_spanish)
        tools.eq_(
            converters.to_unicode(self.euc_jp_japanese, encoding='euc_jp'),
            self.u_japanese)

        tools.assert_raises(TypeError, converters.to_unicode, *[5],
                            **{'nonstring': 'foo'})
Beispiel #50
0
def capitalize(letter):
    """
    Convert string to upper case in utf-8 safe way.
    """
    letter = to_unicode(letter, encoding='utf8')
    newletter = letter.upper()
    newletter = to_bytes(letter, encoding='utf8')
    return newletter
Beispiel #51
0
def get_unicode(string, encoding='utf-8', errors='replace'):
    """fuerza una conversion a unicode a prueba de fallas"""

    # si el valor no es None, intenta convertir a unicode
    if string:
        try:
            RV = to_unicode(string, encoding, errors)

        except Exception:
            encoding = chardet.detect(string)["encoding"]
            RV = to_unicode(string, encoding, errors)

    # si es None, no convierte a unicode
    else:
        RV = string

    return RV
Beispiel #52
0
def get_unicode(string, encoding='utf-8', errors='replace'):
    """fuerza una conversion a unicode a prueba de fallas"""

    # si el valor no es None, intenta convertir a unicode
    if string:
        try:
            RV = to_unicode(string, encoding, errors)

        except Exception:
            encoding = chardet.detect(string)["encoding"]
            RV = to_unicode(string, encoding, errors)

    # si es None, no convierte a unicode
    else:
        RV = string

    return RV
Beispiel #53
0
def capitalize(letter):
    """
    Convert string to upper case in utf-8 safe way.
    """
    letter = to_unicode(letter, encoding='utf8')
    newletter = letter.upper()
    newletter = to_bytes(letter, encoding='utf8')
    return newletter
Beispiel #54
0
 def test_guess_encoding_with_chardet_uninstalled(self):
     if chardet:
         raise SkipTest('chardet installed, euc_jp will not be mangled')
     else:
         tools.ok_(
             to_unicode(self.euc_jp_japanese,
                        misc.guess_encoding(self.euc_jp_japanese)) ==
             self.u_mangled_euc_jp_as_latin1)
Beispiel #55
0
    def exceptions_search(self, uuid, page, search):
        uuid = to_unicode(uuid)
        search = urllib.unquote(search)
        try:
            page = int(page)
        except Exception:
            return abort(404, "Page not found")

        if page < 1:
            return abort(404, "Page not found")

        db = self.manager.mongo
        bots = db.get_collection("bots")
        exceptions = db.get_collection("exceptions")

        now = datetime.datetime.utcnow()

        last_online = now - datetime.timedelta(minutes=10)
        online = bots.find({
            "last_seen": {"$gt": last_online}
        }).count()

        logged_num = exceptions.find({
            "uuid": uuid,
            "traceback": {"$regex": "/%s/" % search}
        }).count()

        if logged_num < 1:
            return template(
                "templates/exceptions_form.html",
                online=online,
                error="No exceptions have been logged for the UUID '%s' "
                      "with the search string '%s'"
                % (uuid, search)
            )

        pages = (int(logged_num) / 10)

        overhang = int(logged_num) % 10

        if overhang > 0:
            pages += 1

        start = (page * 10) - 10
        limit = 10

        if page > pages:
            return abort(404, "Page not found")

        data = exceptions.find({
            "uuid": uuid,
            "traceback": {"$regex": "/%s/" % search}
        }, skip=start, limit=limit, sort=[("date", DESCENDING)])

        return template("templates/exceptions.html",
                        online=online, error=None,
                        cur_page=page, max_page=pages,
                        data=data, uuid=uuid, search=search)
Beispiel #56
0
    def _delete_factoid_interaction(self, txn, factoid_key, location, protocol,
                                    channel):
        """
        Deletes a factoid if it exists, otherwise raises MissingFactoidError
        """

        self.logger.trace("DELETE | Key: %s | Loc: %s | Pro: %s | Cha: %s" %
                          (factoid_key, location, protocol, channel))

        if location == self.CHANNEL:
            txn.execute(
                "DELETE FROM factoids WHERE factoid_key = ? AND "
                "location = ? AND protocol = ? AND channel = ?",
                (to_unicode(factoid_key), to_unicode(location),
                 to_unicode(protocol), to_unicode(channel)))
        else:
            txn.execute(
                "DELETE FROM factoids WHERE factoid_key = ? AND "
                "location = ? AND protocol = ?",
                (to_unicode(factoid_key), to_unicode(location),
                 to_unicode(protocol)))
        if txn.rowcount == 0:
            raise MissingFactoidError(
                _("Factoid '%s' does not exist") % factoid_key)

        e = FactoidDeletedEvent(self, factoid_key)
        self.events.run_callback("Factoids/Deleted", e, from_thread=True)
Beispiel #57
0
 def test_guess_encoding_with_chardet_installed(self):
     if chardet:
         tools.ok_(
             to_unicode(self.euc_jp_japanese,
                        misc.guess_encoding(self.euc_jp_japanese)) ==
             self.u_japanese)
     else:
         raise SkipTest(
             'chardet not installed, euc_jp will not be guessed correctly')