Ejemplo n.º 1
0
def scrape_data_from_google(file_id, keyword):
    try:
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--headless")
        chrome_options.add_argument("--disable-dev-shm-usage")
        chrome_options.add_argument(
            "user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36"
        )

        driver = webdriver.Chrome(chrome_options=chrome_options)
        driver.get(f"https://www.google.com/search?q={keyword}")

        content = driver.page_source
        soup = BeautifulSoup(content, "html.parser")

        total_adword = count_adword(soup)
        total_link = count_link(soup)
        total_search_result = get_total_search_result(soup)
    finally:
        driver.close()

    new_data = Data(file_id=file_id,
                    keyword=keyword,
                    total_adword=total_adword,
                    total_link=total_link,
                    total_search_result=total_search_result,
                    html_code=soup.prettify())
    db_session.add(new_data)
    db_session.commit()
    time.sleep(int(os.environ["SCRAPING_DELAY"]))
Ejemplo n.º 2
0
    def test_get_csv_should_return_200_when_correct_authorization_and_have_data_for_user_id_and_status_is_true_when_have_data(self):
        token = generate_jwt(self.user_id)

        data_id = 1
        keyword = "test-keyword"
        total_adword = 1
        total_link = 1
        total_search_result = "about 1,000"
        html_code = "test-html-code"

        new_data = Data(
            file_id = self.file_id,
            id = data_id,
            keyword = keyword,
            total_adword = total_adword,
            total_link = total_link,
            total_search_result = total_search_result,
            html_code = html_code
        )
        self.db_session.add(new_data)
        self.db_session.commit()

        with app.test_client() as client:
            result = client.get(
                '/csv',
                headers={"Authorization": token}
            )
            assert result.status_code == 200

            expected_result = [
                [self.file_id, self.filename, self.keywords, ANY, True]
            ]
            assert json.loads(result.data) == expected_result
Ejemplo n.º 3
0
    def test_get_data_report_should_return_200_when_file_id_exist(self):
        data_id = 2
        keyword = "test-keyword-2"
        self.new_data = Data(file_id=self.file_id,
                             id=data_id,
                             keyword=keyword,
                             total_adword=self.total_adword,
                             total_link=self.total_link,
                             total_search_result=self.total_search_result,
                             html_code=self.html_code)
        self.db_session.add(self.new_data)
        self.db_session.commit()
        with app.test_client() as client:
            result = client.get('/data-report/1')
            assert result.status_code == 200

            expected_data = [[
                self.keyword, self.total_adword, self.total_link,
                self.total_search_result, self.html_code, self.file_id
            ],
                             [
                                 keyword, self.total_adword, self.total_link,
                                 self.total_search_result, self.html_code,
                                 self.file_id
                             ]]
            assert (json.loads(result.data), expected_data)
Ejemplo n.º 4
0
def user_create():
    if request.method == 'POST':
        name = request.form['user-name']
        phone = request.form['phone-number']

        try:
            data = Data(name=name, phone=phone)
            db.session.add(data)
            db.session.commit()
            return redirect('/users-list')
        except:
            return '<h2>DB create error</h2>'

    else:
        return render_template('create_user.html')
Ejemplo n.º 5
0
def insert():

    if request.method == 'POST':

        nome = request.json['nome']
        endereco = request.json['endereco']
        telefone = request.json['telefone']
        data = request.json['data']
        status = request.json['status']

        try:
            user = Data(nome, endereco, telefone, data, status)
            user.save()

            return jsonify({'ok': True})
        except:
            return jsonify({'mensagem': 'Ocorreu um erro interno'}), 500
Ejemplo n.º 6
0
def init_data():
    data = Data().data

    countries = []
    types = [
        {'label': "Confirmed", 'value': "confirmed"},
        {'label': "Deaths", 'value': "deaths"},
    ]

    tots = {
        'last_date': None,
        'confirmed': 0,
        'deaths': 0,
    }

    for country in data:
        if len(data[country]) > 0:
            countries.append({'label': country, 'value': country})
            tots['confirmed'] += data[country][-1].get('confirmed', 0)
            tots['deaths'] += data[country][-1].get('deaths', 0)
            tots['last_date'] = datetime.strptime(data[country][-1].get('date'), '%Y-%m-%d')
    return data, countries, types, tots
Ejemplo n.º 7
0
    def setUp(self):
        super().setUp()
        self.user_id = 1
        self.email = "[email protected]"
        self.password = "******"

        self.file_id = 1
        self.filename = "test-file.csv"
        self.keywords = 1

        self.data_id = 1
        self.keyword = "test-keyword"
        self.total_adword = 1
        self.total_link = 1
        self.total_search_result = "about 1,000"
        self.html_code = "test-html-code"

        self.new_user = User(id=self.user_id,
                             email=self.email,
                             password=self.password)
        self.db_session.add(self.new_user)
        self.db_session.commit()

        self.new_file = File(user_id=self.user_id,
                             id=self.file_id,
                             filename=self.filename,
                             keywords=self.keywords)
        self.db_session.add(self.new_file)
        self.db_session.commit()

        self.new_data = Data(file_id=self.file_id,
                             id=self.data_id,
                             keyword=self.keyword,
                             total_adword=self.total_adword,
                             total_link=self.total_link,
                             total_search_result=self.total_search_result,
                             html_code=self.html_code)
        self.db_session.add(self.new_data)
        self.db_session.commit()
Ejemplo n.º 8
0
 def run(self):
     for _ in range(100):
         d = Data()
         d.field = 'Data %s' % random.randint(0, 100)
         db.session.add(d)
         db.session.commit()
Ejemplo n.º 9
0
async def test_bot():
    config = Data(getenv('CONFIG_LOCT', default='config.yml'))
    data = Data(getenv('DATA_LOCT', default='data.yml'))
    bot = WatsonBot(config=config, data=data)
    await bot.login(config['bot']['token'])
    await bot.close()
Ejemplo n.º 10
0
 def __init__(self, data, check_version=True):
     self.__data = Data(data, check_version)
Ejemplo n.º 11
0
# get envvars
DEBUG = os.getenv('DEBUG', False)
AUTH_USER = os.getenv('BASIC_AUTH_USERNAME', 'bober')
AUTH_PASS = os.getenv('BASIC_AUTH_PASSWORD', 'pleasechange')

# bootstrap the app
app = Flask(__name__)

# configure app before libraries
app.config['BASIC_AUTH_USERNAME'] = AUTH_USER
app.config['BASIC_AUTH_PASSWORD'] = AUTH_PASS

# attach libraries
Bootstrap(app)
basic_auth = BasicAuth(app)
store = Data()

@app.route('/')
def index():
  deps = store.get_data()
  return render_template("index.jinja", releases=deps['releases'], kits=deps['kits'])

@app.route('/update_git')
@basic_auth.required
def update_git():
  store.update_versions()
  return "OK"

@app.route('/update_version', methods=['POST'])
@basic_auth.required
def update_version():
Ejemplo n.º 12
0
#!/usr/bin/env python3
from models.data import Data
from models.NeuralNetwork import NeuralNetwork as Model
from models.eval import smape
import matplotlib.pyplot as plt
import numpy as np

offset = 0
n_train = 10000
n_test = 500

data = Data('data/database.sqlite')
data.read('May2015', ['body'], offset=offset)
model = Model(data, n_train)
predicted, actual = model.test(data, n_test)
print('Smape: {}'.format(smape(predicted, actual)))
print(n_train)
print(n_test)

if False:

    # These are some interesting visualizations possible with models that use the vectorizer
    l = np.array(["", "the", "reddit", "a", "dog"])
    testX = model.vectorizer.transform(l)
    p = model.model.predict(testX)
    print(model.model.intercept_)
    print(p)
    print(l)

    plt.plot(predicted, actual, 'o')
    plt.ylim()
Ejemplo n.º 13
0
def main():
  config = Data(getenv('CONFIG_LOCT', default='config.yml'))
  data = Data(getenv('DATA_LOCT', default='data.yml'))
  bot = WatsonBot(config=config, data=data)
  bot.run(config['bot']['token'])