Beispiel #1
0
 def eh_linha_vazia(self, linha):
     tester = VerEx().\
      start_of_line().\
      end_of_line()
     teste_1 = tester.match(linha)
     teste_2 = tester.match(VerEx().find(' ').replace(linha, ''))
     teste_3 = linha == ''
     return (teste_1 is not None) or (teste_2 is not None) or teste_3
    def sentiment_words(self, filename):
        """
        Parameters
        ----------
        filename : str
            file path for sentiment scores. Represented with a pos, neg, or both score. .

        Returns
        -------
        Dictionary of sentiment scores for words.
        
        """
        
        df = pd.read_table(filename, skiprows=26)
        df['score'] = df['PosScore'] - df['NegScore']
        df = df[['SynsetTerms', 'score']]
        df.columns = ['words', 'score']

        # remove neutral words
        mask = df['score'] != 0
        df = df[mask]

        # Regex to find number
        rx1 = re.compile('#([0-9])')
        
        
        # Regex to find words
        verEx = VerEx()
        exp = verEx.range('a', 'z', 'A', 'Z')
        rx2 = re.compile(exp.source())
        
        sent_dict = {}
        for i, row in df.iterrows():
            w = row['words']
            s = row['score']
            nums = re.findall(rx1, w)
            
            w = w.split(' ')
            words = []
            if len(w) == 1:
                words = ''.join(re.findall(rx2, str(w)))
            else:
                words = [''.join(re.findall(rx2, str(string))) for string in w]
                
                
            for nn, ww in zip(nums, words):
                # only sentiment for the most common meaning of the word
                if nn == '1':
                    sent_dict[ww] = s

        return sent_dict
# mylist = []
# if mylist:
#     print('zzzz')
# zzz = ['3'][0]
# zzz = str(zzz)
# print(zzz)
# website = '222222222222222222222'
# #
# # print(website.startswith('www'))
# # if website.startswith('www'):
# #     project_performance
# #     print('AAAAAAAAAAAAAAAAA')

from verbalexpressions import VerEx
verbal_expression = VerEx()

# Create a test string
replace_me = "Replace bird with a duck"

# Create an expression that looks for the word "bird"
expression = VerEx().find('bird')

# Execute the expression in VerEx
result_VerEx = expression.replace(replace_me, 'duck')
print(result_VerEx)

# Or we can compile and use the regular expression using re
import re
regexp = expression.compile()
result_re = regexp.sub('duck', replace_me)
Beispiel #4
0
 def setUp(self):
     self.v = VerEx()
Beispiel #5
0
#!/usr/bin/env python
# coding: utf-8

# In[198]:

from verbalexpressions import VerEx

# In[199]:

verEx = VerEx()

# In[200]:

strings = ['123Abdul233', '233Raja434', '223Ethan Hunt444']

# In[201]:

expression = verEx.range('a', 'z', 'A', 'Z', ' ')

# In[202]:

expression.source()

# In[204]:

import re

re_exp = expression.compile()

# In[205]:
Beispiel #6
0
import os
import subprocess
import sys
from mdx_gfm import GithubFlavoredMarkdownExtension

DEBUG_MODE = json.loads(sys.argv[1].lower())


def log(s: str):
    print(" >> %s" % s)


url = 'file:///android_asset/'

# pre hyperlink fixer
re_disable_link = (VerEx().find("[").anything_but("]").find(
    "](http").anything_but(")").find(")").regex())
to_disable_link = r'\2'

# pre hyperlink fixer
re_fix_innerlink = (VerEx().find("[").anything_but("]").find(
    "](#").anything_but(")").find(")").regex())
# %s will be self markdown name
to_fix_innerlink = r'[\2](%s#\4)'

# fix hyperlink
re_fix_linkpath = (VerEx().find("[").anything_but("]").find("](").anything_but(
    '#').maybe("#").anything_but(")").find(")").regex())
to_fix_linkpath = r'[\2](%s\4.html\5\6)' % url

# img-tag to a-tag
re_img2a = (VerEx().find("<img").anything().find(' src="').anything().find(
Beispiel #7
0
# -*- coding:utf-8 -*-
import re
from verbalexpressions import VerEx

# Create an example of how to test for correctly formed URLs

verbal_expression = VerEx()
# word = (verbal_expression.start_of_line('#').anything_but("#").end_of_line("#"))
# print(word.source())

verbal_expression = VerEx()
tester = (verbal_expression.
          start_of_line().
          find('http').
          maybe('s').
          find('://').
          maybe('www.').
          anything_but(' ').
          end_of_line()
          )

# Create an example URL
# test_url = "https://www.google.com"

# Test if the URL is valid
# if tester.match(test_url):
#     print("Valid URL")

# Print the generated regex
print(tester.source())  # => ^(http)(s)?(\:\/\/)(www\.)?([^\ ]*)$
Beispiel #8
0
import markdown
import os
import subprocess
import sys

DEBUG_MODE = json.loads(sys.argv[1].lower())


def log(s: str):
    print(" >> %s" % s)


url = 'file:///android_asset/'

# pre hyperlink fixer
re_disable_link = (VerEx().find("[").anything_but("]").find(
    "](http").anything_but(")").find(")").regex())
to_disable_link = r'\2'

# pre hyperlink fixer
re_fix_innerlink = (VerEx().find("[").anything_but("]").find(
    "](#").anything_but(")").find(")").regex())
# %s will be self markdown name
to_fix_innerlink = r'[\2](%s#\4)'

# fix hyperlink
re_fix_linkpath = (VerEx().find("[").anything_but("]").find("](").anything_but(
    '#').maybe("#").anything_but(")").find(")").regex())
to_fix_linkpath = r'[\2](%s\4.html\5\6)' % url

# img-tag to a-tag
re_img2a = (VerEx().find("<img").anything().find(' src="').anything().find(