Exemple #1
0
    def parse(self, response):
        self.driver.get(response.url)
        self.scroll_down_n_times(1)

        questions = self.get_questions()
        time.sleep(self.SLEEP_TIME)

        # best_answers = self.get_best_answers()
        # time.sleep(self.SLEEP_TIME)

        dates = self.get_question_dates()
        time.sleep(self.SLEEP_TIME)

        # assert len(questions) == len(best_answers) == len(dates)

        if not self.CATEGORY:
            categories = self.get_categories()
            time.sleep(self.SLEEP_TIME)
            assert len(questions) == len(categories)

        for i in range(len(questions)):
            date     = DbManager.convert_date_string_to_datetime(dates[i])
            category = self.CATEGORY if self.CATEGORY else categories[i]
            question = { 'content': questions[i], 'upvotes': -1, 'category': category, 'timestamp': date }
            DbManager.add_question(question)

            # answer = { 'content': best_answers[i], 'upvotes': -1, 'is_best_answer': True, 'timestamp': datetime.now()}
            # DbManager.add_answer(answer)

        self.driver.close()
 def build_index_from_pickles(self):
     dbManager = DbManager()
     pickleManager = PickleManager()
     print("merging pickles: %s" % (datetime.datetime.now()))
     full_index = pickleManager.merge_index_structures()
     print("saving full index: %s" % (datetime.datetime.now()))
     dbManager.save_full_index(full_index)
     print("full index saved: %s" % (datetime.datetime.now()))
Exemple #3
0
 def get_movies(self, term):
     dbManager = DbManager()
     db_list = dbManager.get_movies(term)
     #for index, row in db_list.iterrows():
     return [{
         'id': m[1]['id'],
         'label': m[1]['title'],
         'value': m[1]['title']
     } for m in db_list.iterrows()]
Exemple #4
0
def handle_whois_queue():
    """
    read visitors from database table and look for whois info
    :return: nothing
    """
    if is_whois_locked():
        return False
    lock_whois(True)
    try:
        visitor_lst = read_queue()
        for visitor in visitor_lst:
            ws = WhoisTracker()
            dbman = DbManager()
            dbman.connect()
            try:
                handle_whois(ws,
                             dbman,
                             visitor_ip=visitor['visitor_ip'],
                             referer_url=visitor['referer_url'],
                             page_url=visitor['page_url'],
                             datetime_of_visit=visitor['datetime_of_visit']
                )
                del_visitor_from_queue(visitor['id'], dbman.conn)
                dbman.commit_changes(True)
            except:
                dbman.commit_changes(False)
                raise
            finally:
                dbman.close(commit=None)
    finally:
        lock_whois(False)

    return True
Exemple #5
0
def main_task(max_days, stations_list, mail_list):
    properties = []
    property_urls = []
    db_manager = DbManager()
    domain, initial_search_urls = get_station_based_initial_search_urls(
        max_days, stations_list)
    for initial_search_url in initial_search_urls:
        try:
            results_pages = get_results_pages(domain, initial_search_url)
            for results_page in results_pages:
                domain, individual_house_links = get_individual_house_links(
                    domain, results_page)
                for house_link in individual_house_links:
                    property_data = scrape_individual_house(domain, house_link)
                    db_manager.add_new_property(property_data)
                    if property_data['url'] not in property_urls:
                        properties.append(property_data)
                        property_urls.append(property_data['url'])
        except Exception as e:
            send_email(
                'error@error_email.com',
                f'An error occurred whilst scraping {initial_search_url}: {str(e)}'
            )
            pass

    help_to_buy_filters = ['new', 'luxury', 'help']
    help_to_buy_properties = []
    for item in properties:
        if any(word in item['full_description_text'] for word in help_to_buy_filters) \
            or any(word in item['key_features_text'] for word in help_to_buy_filters):
            help_to_buy_properties.append(item)
            properties.remove(item)

    data = [{
        'type':
        'Possibly Help to Buy',
        'properties':
        help_to_buy_properties,
        'number_of_properties':
        len(help_to_buy_properties),
        'average_price':
        np.average([item['price'] for item in help_to_buy_properties]),
    }, {
        'type':
        'Other Properties',
        'properties':
        properties,
        'number_of_properties':
        len(properties),
        'average_price':
        np.average([item['price'] for item in properties]),
    }]

    send_results(data, mail_list)
Exemple #6
0
def test_run_sql():
    db = DbManager()
    with pytest.raises(DbManagerError) as error_info:
        db.run_sql("blablala1")
        db.run_sql("INSERT INTO data VALUES (?,?,?,?)", [(1, 2, 3, 4)])
        db.run_sql("INSERT INTO cities VALUES (?,?,?,?,?)",
                   [(3, "4", "5", "6", "&")])
    def addCarhabLayer(self, carhabLayer):
        db = DbManager(carhabLayer.dbPath)
        if not db.version() == last_db_version():
            versions = [os.path.basename(os.path.join(pluginDirectory,'update', f)).split('upd_')[1].split('.sql')[0] for f in listdir(os.path.join(pluginDirectory,'update')) if os.path.basename(os.path.join(pluginDirectory,'update', f)).startswith('upd_')]
            versions = [v for v in versions if v > db.version()]
            versions.sort()
            for v in versions:
                res = db.executeScript(os.path.join(pluginDirectory,'update', 'upd_'+v+'.sql'))
                if res:
                    error_update_db()
                else:
                    db.set_version(last_db_version())
                    db.commit()
        self.layerMap[carhabLayer.id] = carhabLayer

        root = QgsProject.instance().layerTreeRoot()
        group = root.addGroup('carhab_'+carhabLayer.getName())

        for tableToLoad in ('point', 'polyline', 'polygon'):
            group.addLayer(self.loadLayerTable(carhabLayer, tableToLoad))
            #self.loadLayerTable(carhabLayer.dbPath, tableToLoad)

        # Create row corresponding to carhab layer into carhab layer list.
        uiPath = os.path.join(pluginDirectory, 'carhab_layer_item.ui')
        carhabLayerWdgt = loadUi(uiPath)

        jobNameLbl = carhabLayerWdgt.findChild(QLabel,'job_name_label')
        jobNameLbl.setText(carhabLayer.getName())
 def save_index_and_max_freq(self, index_structures, file_name):
     dbManager = DbManager()
     index = {}
     max_freqs = {}
     for index_structure in index_structures:
         dbManager.insert_max_freq_doc(index_structure.doc_id,
                                       index_structure.get_max_freq())
         for i in range(len(index_structure.Terms)):
             term = index_structure.Terms[i]
             frequency = index_structure.Frequencies[i]
             if term in index:
                 index[term] = index[term] + "," + str(frequency)
             else:
                 index[term] = str(frequency)
     self.storeData(index, "data_" + file_name)
Exemple #9
0
    def rank_cosine_sim(self, doc_ids, q_terms):
        dbManager = DbManager()
        builder = StructureBuilder()
        docs_relevant_scores = {}
        q_freqs = dict()
        # set the query terms frequencies
        for q_term in q_terms:
            if q_term in q_freqs:
                q_freqs[q_term] += 1
            else:
                q_freqs[q_term] = 1

        # set max frequency
        sorted_q_freqs = sorted(q_freqs.items(),
                                key=operator.itemgetter(1),
                                reverse=True)
        max_q_freq = sorted_q_freqs[0][1]

        for doc_id in doc_ids:
            tf_idf_sum = 0
            denom_di_sum = 0
            denom_qi_sum = 0
            for q_term in q_terms:
                q_doc_freq = self.get_q_doc_freq(q_term, doc_id)
                if q_doc_freq == None: continue  # not found on index
                max_freq_doc = dbManager.get_max_freq_doc(doc_id)
                if max_freq_doc != None:
                    self.max_freq_docs[doc_id] = max_freq_doc
                    # number of documents in DC in which q_term appears at least once.
                    n_docs_q_term = len(self.q_terms_freqs[q_term])

                    tf_idf_doc = self.calc_tf_idf(q_doc_freq, max_freq_doc,
                                                  self.docs_count,
                                                  n_docs_q_term)
                    tf_idf_q = self.calc_tf_idf(q_freqs[q_term], max_q_freq,
                                                self.docs_count, n_docs_q_term)
                    tf_idf = tf_idf_doc * tf_idf_q
                    tf_idf_sum += tf_idf
                    denom_di_sum += tf_idf_doc**2
                    denom_qi_sum += tf_idf_q**2
                    #tf_idf_sum += tf_idf_doc
            denom = math.sqrt(denom_di_sum) * math.sqrt(denom_qi_sum)
            score = tf_idf_sum / denom
            docs_relevant_scores[doc_id] = round(score, 3)
        sorted_docs_total_freqs = sorted(docs_relevant_scores.items(),
                                         key=operator.itemgetter(1),
                                         reverse=True)
        return sorted_docs_total_freqs
Exemple #10
0
    def setupUi(self, MainWindow):
        MainWindow.setObjectName("MainWindow")
        MainWindow.resize(1440, 900)
        MainWindow.setWindowTitle("TextBlock Application")
        self.centralwidget = QtWidgets.QWidget(MainWindow)
        self.centralwidget.setObjectName("centralwidget")

        self.db = DbManager('text_block')

        self.setStyle(MainWindow)
        self.setMenubar(MainWindow)
        self.setHeaderText()
        self.setButtons()
        self.setIOLayout()

        self.retranslateUi(MainWindow)
        QtCore.QMetaObject.connectSlotsByName(MainWindow)
def test_get_stations():
    data_manager = DataManager(DbManager())
    result = data_manager.get_stations()
    assert type(result) == list
    assert len(result) > 0

    result = data_manager.get_stations(531)
    assert type(result) == tuple
    assert len(result) == 7
 def run(self):
     try:
         data = DataManager(DbManager()).prepare_necessary_data()
         self.signals.result.emit(data)
     except DataManagerError as e:
         self.signals.error.emit(e)
         get_logger("main_program").exception(e)
     finally:
         self.signals.finished.emit()
Exemple #13
0
def fetch_resource(resource_type):
    args = Args(resource_type)
    db_manager = DbManager(args)
    scraper = get_scraper(args)
    if args.refetch:
        db_manager.delete_resource(args.db_key)
    if not db_manager.resource_exists(args.db_key):
        resource_data = scraper.get_resource(args.query_params)
        if scraper.driver:
            scraper.driver.quit()
        db_manager.save_resource(args.db_key, resource_data)
    return db_manager.fetch_resource(args.db_key)
Exemple #14
0
    def run(self ):
        print 'run worker'
        try:
            print 'begin try'
            if CarhabLayerRegistry.instance().getCurrentCarhabLayer():
                
                # Connect to db.
                self.db = DbManager(CarhabLayerRegistry.instance().getCurrentCarhabLayer().dbPath)
                
                # To let import geos invalid geometries.
                self.layer.setValid(True)
                layerFeatCount = self.layer.featureCount()
                
                # Initialisation of useful variables to calculate progression.
                lastDecimal = 0
                i = 0
                
                for feature in self.layer.getFeatures():
                    if not self.stop:
                        featGeom = feature.geometry()
                        if featGeom.type() == 2: # Polygons case
                            if featGeom.isMultipart(): # Split multipolygons
                                for part in featGeom.asGeometryCollection():
                                    if not part.isGeosValid(): # May be a problem...
                                        print 'part not valid'
                                    self.insertPolygon(part)
                            else:
                                self.insertPolygon(feature.geometry())
                            
                            # Calculate and emit new progression value (each percent only).
                            newDecimal = int(100*i/layerFeatCount)
                            if lastDecimal != newDecimal:
                                self.progress.emit(newDecimal)
                                lastDecimal = newDecimal
                            i = i + 1

                    else: # Thread has been aborted
                        print 'abort'
                        # Cancel inserts already done
                        self.db.conn.rollback()
                        self.finished.emit(False, 2)
                        break
                self.db.commit()
                self.db.close()
                self.finished.emit(True, 0)
            else: # None current carhab layer (error code 1)
                self.finished.emit(False, 1)
            
        except:
            print 'exception'
            import traceback
            print traceback.format_exc()
            self.error.emit(traceback.format_exc())
            self.finished.emit(False, 0)
Exemple #15
0
def test_get_sensors_by_station_id(stat_id, expected):
    db = DbManager()

    result = db.get_sensors_by_station_id(stat_id)
    assert type(result) == list
    assert len(result) == expected

    with pytest.raises(DbManagerError):
        db.get_sensors_by_station_id("")
        db.get_sensors_by_station_id(None)
Exemple #16
0
    def rank(self, doc_ids, q_terms):
        """
        docs => array, every 
            doc => should have an array of terms
                [term] => must have frequency
                
        Calculate Relevance Score
        1) Calculate TF
        I need for each document the list of words (non-stop, stemmed/lemmatized) and their frequencies
        I need the frequency of the most frequently-occurred term of each document (constant per document)
        2) Calculate IDF
        I need the number of documents in DC (constant)
        I need the number of documents in DC in which w appears at least once.
        """
        dbManager = DbManager()
        builder = StructureBuilder()
        docs_relevant_scores = {}

        for doc_id in doc_ids:
            tf_idf_sum = 0
            for q_term in q_terms:
                q_doc_freq = self.get_q_doc_freq(q_term, doc_id)
                if q_doc_freq == None: continue  # not found on index
                max_freq_doc = dbManager.get_max_freq_doc(doc_id)
                if max_freq_doc != None:
                    self.max_freq_docs[doc_id] = max_freq_doc
                    # number of documents in DC in which q_term appears at least once.
                    n_docs_q_term = len(self.q_terms_freqs[q_term])

                    tf_idf_doc = self.calc_tf_idf(q_doc_freq, max_freq_doc,
                                                  self.docs_count,
                                                  n_docs_q_term)
                    tf_idf_sum += tf_idf_doc
            docs_relevant_scores[doc_id] = round(tf_idf_sum, 3)
        sorted_docs_total_freqs = sorted(docs_relevant_scores.items(),
                                         key=operator.itemgetter(1),
                                         reverse=True)
        return sorted_docs_total_freqs
Exemple #17
0
    def insertPolygon(self, geometry):
        
        # Convert geometry
        wktGeom = geometry.exportToWkt()
        # To force 2D geometry
        if len(wktGeom.split('Z')) > 1:
            wktGeom = wktGeom.split('Z')[0] + wktGeom.split('Z')[1]
            wktGeom = wktGeom.replace(" 0,", ",")
            wktGeom = wktGeom.replace(" 0)", ")")
        geom = "GeomFromText('"
        geom += wktGeom
        geom += "', 2154)"

        geomObj = {}
        geomObj['the_geom'] = geom
        
        db = DbManager(CarhabLayerRegistry.instance().getCurrentCarhabLayer().dbPath)
        
        r = Recorder(db, 'polygon')
        r.input(geomObj)
        
        db.commit()
        db.close()
Exemple #18
0
    def __init__(self):
        wx.Frame.__init__(self, None, wx.ID_ANY, u'艦ZUME', size=(450, 500))
        self.isCaputre = False
        
        # アイコン
        icon = wx.Icon('./data/img/kanzume.ico', wx.BITMAP_TYPE_ICO)
        self.SetIcon(icon)
        
        # パケットキャプチャ準備
        self.pcap = PacketCapture()
        self.pcap.set_text(self.set_text)
        self.pcap.set_progress_value(self.set_progress_value)
        self.army_mng = None

        # データベース準備
        self.dbm = DbManager()
        
        # ステータスバーの初期化
        self.CreateStatusBar()
        self.SetStatusText(u'Let\'s capture!')
        self.GetStatusBar().SetBackgroundColour(None)

        # メニューバーの初期化
        selected_nwif_index = self.dbm.get_selected_nwif_index()
        self.pcap.nif_mng.set_selected_devId(selected_nwif_index)
        menuBar = CaptureMenuBar(self.pcap.nif_mng.get_networkif_listf())
        menuBar.set_selected_nwif(selected_nwif_index)
        self.SetMenuBar(menuBar)
        self.Bind(wx.EVT_MENU, self.select_networkif)

        # パネル
        panel_base = wx.Panel(self, wx.ID_ANY)
        self.panel_capture = CapturePanel(panel_base)
        self.panel_result = ResultPanel(panel_base, self.dbm)

        # クリックリスナ登録
        self.panel_capture.set_on_click_listener(self.cap_btn_click_listener)

        layout = wx.BoxSizer(wx.VERTICAL)
        layout.Add(self.panel_capture, flag=wx.EXPAND | wx.ALL, border=10)
        layout.Add(self.panel_result, proportion=1, flag=wx.EXPAND)
        panel_base.SetSizer(layout)
        layout.Fit(panel_base)
def test_get_from_api():
    data_manager = DataManager(DbManager())

    with pytest.raises(ApiError) as e_info:
        data_manager.call_api("")
        data_manager.call_api("asdef")
        data_manager.call_api("http://google.com")

    with pytest.raises(ApiError) as e_info:
        data_manager.call_api(DataManager.URL_DATA)

    result = data_manager.call_api(data_manager.URL_STATIONS)
    assert type(result) == list
    assert len(result) > 0

    result = data_manager.call_api(f"{DataManager.URL_DATA}/92")
    assert type(result) == dict
    assert result["key"] == "PM10"
    assert type(result["values"]) == list
    assert len(result["values"]) > 0
    assert "date" in result["values"][0]
Exemple #20
0
    def get_qprecs(self, algo, movies, nr_recs, items, user_id):
        dbManager = DbManager()
        #user_id = dbManager.insert_and_get_min_user_id()
        rating_value = 5
        # remove all ratings from this user
        dbManager.remove_ratings(user_id)

        # insert ratings (value: 10) for user 0 for each movie
        for m in movies:
            dbManager.insert_rating(user_id, m, rating_value)

        # get recs
        all_recs = self.get_recs_from_recserver([user_id], nr_recs, algo,
                                                items)
        print(all_recs)
        for user_recs in all_recs:
            if user_recs['recs'] != None:
                for rec in user_recs['recs']:
                    rec['title'] = dbManager.get_movie(
                        rec['item']).loc[0, 'title']
                    rec['image_url'] = self.get_image_url(rec['item'])

        return all_recs
Exemple #21
0
        grid_layout.addWidget(signup_btn, 4, 1)
        signup_btn.clicked.connect(self.on_signup_click)
        self.password_field.setEchoMode(QLineEdit.PasswordEchoOnEdit)
        self.show()

    def on_signup_click(self):
        try:

            self.db.signup(self.login_field.text(), self.purse_field.text(),
                           self.password_field.text(), self.email_field.text())

        except:
            self.error_dialog = QtWidgets.QErrorMessage()
            self.error_dialog.showMessage('Such user already is exist!')
            return
        #придумать как вернуться назад к ауфу
        QMessageBox.information(
            self, "User created.",
            str(self.login_field.text() + " has been added! Please, Log In!"),
            QMessageBox.Ok)
        lg = self.wm.get_window("login")
        lg.setVisible(True)
        self.setVisible(False)


if __name__ == '__main__':
    app = QApplication(sys.argv)
    wm = WindowsManager()
    wm.add("db", DbManager())
    ex = Signup(wm)
    sys.exit(app.exec_())
 def get_document(self, doc_id):
     dbManager = DbManager()
     return dbManager.get_document(doc_id)
Exemple #23
0
    def get_candidate_documents_ids(self, q_terms):
        """
        1) look for each stemmed query term in redis to get their documents e.g. => 1:5,2:10,4:22
        2) create a dictionary with doc:count
        3) only return the docs that have all query terms, if there are less than 50, 
            then reduce the amount of minimum query terms until you get 50 or more.
            
        --for every doc that already exists add the current freq
        --3) order the dictionary by the most frequent and return the id docs list.
        """
        MAX_DOCUMENTS_TO_RETRIEVE = 50
        candidate_documents = []
        dbManager = DbManager()
        docs_total_freqs = {
        }  # used to track every candidate document => doc:totalFrequency
        q_terms_count_per_doc = {
        }  # only tracks the q_terms_count_per_doc => doc:count
        docs_count_all_terms = []
        count = 1
        for q_t in q_terms:
            doc_freqs = dbManager.get_index_term(q_t)
            if doc_freqs != None:
                docs_with_frequency = doc_freqs.split(',')
                self.q_terms_freqs[q_t] = docs_with_frequency
                for doc_with_freq in docs_with_frequency:
                    if len(doc_with_freq) > 1:
                        doc_freq = doc_with_freq.split(
                            ':'
                        )  # 0-index is docId | 1-index is the doc frequency
                        docId = doc_freq[0]
                        docFreq = doc_freq[1]
                        if docId in docs_total_freqs:
                            docs_total_freqs[docId] += docFreq
                            q_terms_count_per_doc[docId] += 1
                            if count == len(q_terms) and q_terms_count_per_doc[
                                    docId] == len(q_terms):
                                docs_count_all_terms.append(q_terms)
                        else:
                            docs_total_freqs[docId] = docFreq
                            q_terms_count_per_doc[docId] = 1
            count += 1

        q_terms_min_size = len(q_terms) if len(q_terms) == 1 else int(
            len(q_terms) / 2)
        # get the most frequent terms
        sorted_docs_total_freqs = sorted(docs_total_freqs.items(),
                                         key=operator.itemgetter(1),
                                         reverse=True)
        for k, v in sorted_docs_total_freqs:
            # get the docs which have all query terms at least
            if q_terms_count_per_doc[k] == len(q_terms):
                candidate_documents.append(k)
            elif len(docs_count_all_terms) < MAX_DOCUMENTS_TO_RETRIEVE:
                candidate_documents.append(k)
            if len(candidate_documents) >= MAX_DOCUMENTS_TO_RETRIEVE:
                break

        if len(candidate_documents) > MAX_DOCUMENTS_TO_RETRIEVE:
            candidate_documents = candidate_documents[
                0:MAX_DOCUMENTS_TO_RETRIEVE]

        return candidate_documents
Exemple #24
0
def test_get_data_by_sensors_ids():
    db = DbManager()
    result = db.get_data_by_sensors_ids([3575, 3576])
    assert len(result) >= 96
    assert type(result) == list
Exemple #25
0
def last_db_version():
    db = DbManager(path.join(pluginDirectory, 'empty.sqlite'))
    return db.version()
Exemple #26
0
cond_others = "pref_cd not in (12, 13, 14)"

sql = """ select 
 sum( case when {cond_tokyo}    then 1 else 0 end ) as cnt_tokyo
,sum( case when {cond_chiba}    then 1 else 0 end ) as cnt_chiba
,sum( case when {cond_kanagawa} then 1 else 0 end ) as cnt_kanagawa
,sum( case when {cond_others}   then 1 else 0 end ) as cnt_others
from users 
""".format(cond_tokyo=cond_tokyo,
           cond_kanagawa=cond_kanagawa,
           cond_chiba=cond_chiba,
           cond_others=cond_others)

print(sql)

dbm = DbManager()
result = dbm.fetch_one(sql)
print(result)
(cnt_tokyo, cnt_chiba, cnt_kanagawa, cnt_others) = result

text = """現在の会員数レポート {year}年{month}月
東京: {cnt_tokyo}
千葉:{cnt_chiba}
神奈川:{cnt_kanagawa}
その他:{cnt_others}
""".format(year=year,
           month=month,
           cnt_tokyo=cnt_tokyo,
           cnt_chiba=cnt_chiba,
           cnt_kanagawa=cnt_kanagawa,
           cnt_others=cnt_others)
Exemple #27
0
def main():
    # See configuration.ini for details
    fetch_regobs, load_excel_data, api_fetch_list, api_delete_list, api_initialize_list = load_configuration()

    # Handle command line arguments
    force_update = parse_command_line_arguments()

    logging.info(
        'Application started with force_update={}'.format(force_update))

    # Create engine and db_inserter
    try:
        engine = create_db_connection()
    except Exception as e:
        logging.exception('Cannot connect to the database')
        raise e

    db_manager = DbManager(engine)

    if load_excel_data:
        logging.info('Adding excel data to database table excel_data..')
        excel_data = process_excel_data()
        ExcelData.metadata.create_all(engine)
        db_manager.insert_dataframe('excel_data', excel_data, if_exists='replace')

    logging.info('Fetching RegObs data..')
    # Fetch regobs data from api
    if fetch_regobs:
        try:
            api_data = Regobs().get_data()
        except Exception as e:
            logging.exception('Cannot fetch RegObs data')
            raise e

        to_csv(api_data, 'csv_files/regobs.csv')

    # Load regobs data from csv file (can be useful for debugging or testing incremental update)
    else:
        api_data = read_csv('csv_files/regobs.csv')

    # Incremental update. Only update added, updated or deleted records in database tables.
    if not force_update:
        # Specify that the dataframe should be appended to the existing data in the database tables
        if_table_exists_in_database = 'append'

        # Query current data in database
        logging.info('Querying regobs table from database..')
        try:
            db_data = db_manager.query_all_data_from_table(
                'regobs_data', 'reg_id')
        except exc.NoSuchTableError as e:
            logging.exception(
                'The table regobs_data does not exist in the database. Run the application with --force-update command line parameter to initialize all tables and fetch all data.')
            raise e
        except Exception as e:
            logging.exception('Cannot query RegObs data from database')
            raise e

        # Compare current database data with new api data
        logging.info(
            'Comparing dataframes to determine which rows are added or removed..')
        # Rows to delete from all tables
        deleted_rows = dataframe_difference(
            db_data, api_data, ['reg_id', 'dt_change_time'])

        # Rows to add to all tables
        new_rows = dataframe_difference(
            api_data, db_data, ['reg_id', 'dt_change_time'])

        deleted_reg_ids = list(deleted_rows['reg_id'])

        deleted_reg_ids = [int(x) for x in deleted_reg_ids]
        logging.info('Records with the following reg_ids will be deleted from the database: {}'.format(
            deleted_reg_ids))

        if deleted_reg_ids:
            # Delete removed rows from api's
            try:
                for data_class in api_delete_list:
                    logging.info(
                        'Deleting removed records for: {}'.format(data_class.__name__))
                    db_manager.delete_rows_with_reg_id(
                        deleted_reg_ids, data_class)
            except Exception as e:
                logging.exception(
                    'Cannot delete removed records from database table')
                raise e
        else:
            logging.info(
                'There are no deleted records to remove from the database')

        if not new_rows.empty:
            logging.info(
                'Number of new records to add: {}'.format(len(new_rows)))

            try:
                avalanche_incident_list = AvalancheIncident.from_dataframe(
                    new_rows
                )
            except Exception as e:
                logging.exception(
                    'Cannot create avalanche_incident_list from regobs data')
                raise e

            # Append new rows to regobs table
            try:
                insert_regobs_data_to_database(new_rows, db_manager, 'append')
            except Exception as e:
                logging.exception(
                    'Cannot append RegObs data to database table')
                raise e

        else:
            avalanche_incident_list = []

    # Initialize database and load all data
    elif force_update:
        # Specify that the dataframe should replace existing data in the database table
        if_table_exists_in_database = 'replace'

        try:
            avalanche_incident_list = AvalancheIncident.from_dataframe(
                api_data
            )
        except Exception as e:
            logging.exception(
                'Cannot create avalanche_incident_list from regobs data')
            raise e

        logging.info('Initializing database tables..')
        try:
            initialize_tables(api_initialize_list, engine)
        except Exception as e:
            logging.exception(
                'Cannot initialize tables in database')
            raise e

        try:
            insert_regobs_data_to_database(api_data, db_manager, 'replace')
        except Exception as e:
            logging.exception(
                'Cannot add RegObs data to database table')
            raise e

    if not avalanche_incident_list:
        logging.info('There is no new records to add to the database')
        logging.info('The application terminated successfully')
        return

    try:
        api_table_dict = get_table_dict_for_apis_in_list(
            api_fetch_list, avalanche_incident_list)
    except Exception as e:
        logging.exception(
            'Error fetching API data')
        raise e

    # Set new database connection
    db_manager.engine = create_db_connection()

    try:
        insert_data_for_table_dict(
            api_table_dict, db_manager, if_table_exists_in_database)
    except Exception as e:
        logging.exception(
            'Cannot add API data to database table')
        raise e

    logging.info('The application terminated successfully')
Exemple #28
0
class Import(QObject):
    
    finished = pyqtSignal(bool, int)
    error = pyqtSignal(str)
    progress = pyqtSignal(float)
    
    def __init__(self, layer):
        super(Import, self).__init__()
        
        self.layer = layer
        
        # To let aborting thread from outside
        self.stop = False
        

    def insertPolygon(self, geometry):
        
        # Convert geometry
        wktGeom = geometry.exportToWkt()
        # To force 2D geometry
        if len(wktGeom.split('Z')) > 1:
            wktGeom = wktGeom.split('Z')[0] + wktGeom.split('Z')[1]
            wktGeom = wktGeom.replace(" 0,", ",")
            wktGeom = wktGeom.replace(" 0)", ")")
        geom = "GeomFromText('"
        geom += wktGeom
        geom += "', 2154)"

        geomObj = {}
        geomObj['the_geom'] = geom
        
        db = DbManager(CarhabLayerRegistry.instance().getCurrentCarhabLayer().dbPath)
        
        r = Recorder(db, 'polygon')
        r.input(geomObj)
        
        db.commit()
        db.close()

    def run(self ):
        print 'run worker'
        try:
            print 'begin try'
            if CarhabLayerRegistry.instance().getCurrentCarhabLayer():
                
                # Connect to db.
                self.db = DbManager(CarhabLayerRegistry.instance().getCurrentCarhabLayer().dbPath)
                
                # To let import geos invalid geometries.
                self.layer.setValid(True)
                layerFeatCount = self.layer.featureCount()
                
                # Initialisation of useful variables to calculate progression.
                lastDecimal = 0
                i = 0
                
                for feature in self.layer.getFeatures():
                    if not self.stop:
                        featGeom = feature.geometry()
                        if featGeom.type() == 2: # Polygons case
                            if featGeom.isMultipart(): # Split multipolygons
                                for part in featGeom.asGeometryCollection():
                                    if not part.isGeosValid(): # May be a problem...
                                        print 'part not valid'
                                    self.insertPolygon(part)
                            else:
                                self.insertPolygon(feature.geometry())
                            
                            # Calculate and emit new progression value (each percent only).
                            newDecimal = int(100*i/layerFeatCount)
                            if lastDecimal != newDecimal:
                                self.progress.emit(newDecimal)
                                lastDecimal = newDecimal
                            i = i + 1

                    else: # Thread has been aborted
                        print 'abort'
                        # Cancel inserts already done
                        self.db.conn.rollback()
                        self.finished.emit(False, 2)
                        break
                self.db.commit()
                self.db.close()
                self.finished.emit(True, 0)
            else: # None current carhab layer (error code 1)
                self.finished.emit(False, 1)
            
        except:
            print 'exception'
            import traceback
            print traceback.format_exc()
            self.error.emit(traceback.format_exc())
            self.finished.emit(False, 0)
Exemple #29
0
def test_get_all_stations_by_param():
    db = DbManager()
    result = db.get_all_stations_by_param("CO")
    assert len(result) > 1
    assert type(result) == list
def track_whois(visitor_id, ip):
    """

    :param visitor_id: id of the visitor table
    :param ip: visitor ip_address
    :return: nothing
    """
    dbman = DbManager()
    dbman.connect()
    org_id = dbman.find_org_by_ip(ip)
    if org_id is None:
        ws = WhoisScraper()
        whois_tuple = ws.scrape_whois(ip)
        if whois_tuple is not None:
            org_id = dbman.append_new_org(whois_tuple)
            dbman.update_visitor(visitor_id, org_id)
    else:
        dbman.update_visitor(visitor_id, org_id)

    dbman.close()
Exemple #31
0
def test__get_data_by_stations_ids():
    db = DbManager()
    result = db.get_data_by_stations_ids([530, 531], "NO2")
    assert len(result) >= 47
Exemple #32
0
from main_window import UiMainWindow
from PyQt5 import QtCore, QtGui, QtWidgets
from data_manager import DataManager
from db_manager import DbManager, DbManagerError
import logging_setup
import sys

if __name__ == "__main__":
    """
    Runs main application.
    """
    app = QtWidgets.QApplication(sys.argv)
    main_window = QtWidgets.QMainWindow()

    main_logger = logging_setup.get_logger("main_program")
    try:
        data_base = DbManager()
        data_manager = DataManager(data_base)
        ui = UiMainWindow(data_manager, main_logger)
        ui.setup_ui(main_window)
        main_window.show()
    except DbManagerError as e:
        main_logger.exception(e)

    sys.exit(app.exec_())
Exemple #33
0
# coding=utf8

from db_manager import DbManager

databases = DbManager()


# ----------------------新添加的数据库在此处初始化--------------------------------


# 若有新添加的数据库,先在db_config中添加配置信息后,在下面初始化
# 如:cps_conn = databases.get_one_coon('cps')
# 之后哪里需要用,直接 'from db_manage.db_init import XXXX_conn' 即可
# 使用方法:XXXX_conn.query(<your_sql>)

cps_conn = databases.get_one_coon('cps')


# --------------------需要engine的时候才需要在下面添加------------------------------


# 若是需要直接操作engine,可以在下面进行初始化
# 如:cps_engine = databases.get_one_engine('cps')
# 功能:pd.read_sql(<your_sql>, XXXX_engine)

cps_engine = databases.get_one_engine('cps')


if __name__ == '__main__':
    sql = 'SELECT * from biz.user_mmv_status LIMIT 2'
    results = cps_conn.query(sql)  # 返回包含元组的list
Exemple #34
0
 def setup_db(self):
     dbManager = DbManager()
     dbManager.setup_db()
Exemple #35
0
    def add_snippets(self, ranked_docs, query):
        dbManager = DbManager()
        builder = StructureBuilder()
        docs_with_snippets = []
        tf_idf_q_terms = {}
        q_terms = builder.get_stemmed_tems(query)

        for q_term in q_terms:
            # number of documents in DC in which q_term appears at least once.
            n_docs_q_term = len(self.q_terms_freqs[q_term]
                                ) if q_term in self.q_terms_freqs else 0
            if n_docs_q_term != 0:
                freq_d = len([q for q in q_terms if q == q_term])
                max_q_freq = self.get_local_max_freq(q_terms)
                tf_idf_q_terms[q_term] = self.calc_tf_idf(
                    freq_d, max_q_freq, self.docs_count, n_docs_q_term)
            else:
                tf_idf_q_terms[q_term] = 0

        for ranked_doc in ranked_docs:
            doc_id = ranked_doc[0]
            docs_relevant_scores = {}
            doc = dbManager.get_document(doc_id)
            if doc == None: continue
            sentences = self.get_doc_sentences(doc)
            title = sentences.pop(0)['content']

            for sentence in sentences:
                senetence_content = sentence['content']
                # if the sentence has less than 2 character then it is probabily not an actual sentence.
                if len(senetence_content) <= 2: continue
                sentence_id = sentence['id']
                tf_idf_sum = 0
                denom_di_sum = 0
                denom_qi_sum = 0
                index_sentence = builder.get_stemmed_terms_frequencies_from_doc(
                    sentence)
                for q_term in q_terms:
                    # check the not stemmed words
                    if q_term in index_sentence.Terms:
                        q_sentence_freq = index_sentence.get_term_freq(q_term)
                        max_freq = index_sentence.get_max_freq()
                        # if the query term doesn't have frequency on the sentence and there is no max freq. then disregard this q_term
                        if (q_sentence_freq == 0 and max_freq == 0):
                            continue

                        tf_idf_doc = self.calc_tf_idf(
                            q_sentence_freq, max_freq, self.docs_count,
                            len(self.q_terms_freqs[q_term]))
                        tf_idf_q = tf_idf_q_terms[q_term]
                        # The two sentences in d that have the highest cosine similarity with respect to q; with TF-IDF as the term weighting scheme.

                        tf_idf_sum += tf_idf_doc * tf_idf_q
                        denom_di_sum += tf_idf_doc**2
                        denom_qi_sum += tf_idf_q**2

                denom = math.sqrt(denom_di_sum) * math.sqrt(denom_qi_sum)
                score = tf_idf_sum / denom if denom != 0 else 0
                docs_relevant_scores[sentence_id] = round(score, 3)

            sorted_docs_total_freqs = sorted(docs_relevant_scores.items(),
                                             key=operator.itemgetter(1),
                                             reverse=True)
            top_sentences = sorted_docs_total_freqs[0:2]
            top_snippets = [
                s['content'] for s in sentences
                if s['id'] == top_sentences[0][0]
                or s['id'] == top_sentences[1][0]
            ]

            docs_with_snippets.append({
                "docId": doc_id,
                "score": ranked_doc[1],
                "title": title,
                "snippets": top_snippets
            })
        return docs_with_snippets
Exemple #36
0
 def get_current_user_id(self):
     dbManager = DbManager()
     user_id = dbManager.insert_and_get_min_user_id()
     return user_id
Exemple #37
0
def save_to_database_task(property_data):
    db_manager = DbManager()
    db_manager.add_new_property(property_data)
Exemple #38
0
API_KEY = '<bot_api_key>'

URL = 'https://api.telegram.org/bot{BOT_API_KEY}/sendMessage?chat_id={CHANNEL_NAME}&text={MESSAGE_TEXT}'

CHANNEL_NAME = '<channel_name>'

MSG_TEMPLATE = '''На форуме новый вопрос: "{title}"
Опубликуйте ответ тут: {link}'''

if __name__ == '__main__':
    print('Start')
    posts = get_content()
    if len(posts) == 0:
        exit(0)

    db = DbManager('test.db')
    db.connect()
    for post in posts:
        title = post['title']
        link = post['link']
        result = db.find_post(title)

        if len(result) == 0:
            db.add_new_post(title, link)
            msg = MSG_TEMPLATE.format(title=title, link=link)
            url = URL.format(
                BOT_API_KEY=API_KEY,
                CHANNEL_NAME=CHANNEL_NAME,
                MESSAGE_TEXT=msg
            )
Exemple #39
0
from flask import Blueprint
from flask import Flask, abort, request, jsonify
from flask import jsonify
from flask import redirect
import json
import logging
from db_manager import DbManager

# print("URL Service")

logging.basicConfig(level=logging.DEBUG)

app = Flask(__name__)

dbmgr = DbManager()
dbmgr.connect_db()

# url_service = Blueprint('url_api', __name__) # flask blueprints: https://flask.palletsprojects.com/en/1.1.x/blueprints/


#
#   geturlshrtn: returns the shortened url for a given url (internal)
#
@app.route('/geturlshrtn/<url_shrt_code>', methods=['GET'])
def getUrlShrtn(url_shrt_code):
    print("\n\n/geturlshrtn/ for: " + str(url_shrt_code))

    url_shrtn = dbmgr.get_url_shrtn(url_shrt_code)

    return jsonify(url_shrtn)
Exemple #40
0
class KanzumeFrame(wx.Frame):
    """
    メインパネル
    """

    def cap_btn_click_listener(self, event):
        startWorker(self.done_capture, self.capture)

    def capture(self):
        logging.info('パケットキャプチャ開始')
        self.panel_capture.cap_btn.Disable()
        self.set_progress_value(0)
        self.panel_capture.text.Hide()
        self.panel_capture.progress.Show()
        self.SetStatusText("capture......")
        return self.pcap.startCapture()
    
    def done_capture(self, result):
        self.army_mng = result.get()
        self.panel_capture.text.Show()
        self.panel_capture.progress.Hide()
        if self.army_mng is not None:
            self.panel_result.updateResult(self.army_mng)
            self.SetStatusText("capture success!")
        else:
            self.SetStatusText("capture failed...")
        self.panel_capture.cap_btn.Enable()
        logging.info('パケットキャプチャ終了')

    def select_networkif(self, event):
        self.pcap.nif_mng.set_selected_devId(event.GetId())
        self.dbm.update_nwif(event.GetId())

    def set_text(self, text):
        self.panel_capture.set_text(text)

    def set_progress_value(self, value):
        self.panel_capture.set_progress_value(value)
        
    def __init__(self):
        wx.Frame.__init__(self, None, wx.ID_ANY, u'艦ZUME', size=(450, 500))
        self.isCaputre = False
        
        # アイコン
        icon = wx.Icon('./data/img/kanzume.ico', wx.BITMAP_TYPE_ICO)
        self.SetIcon(icon)
        
        # パケットキャプチャ準備
        self.pcap = PacketCapture()
        self.pcap.set_text(self.set_text)
        self.pcap.set_progress_value(self.set_progress_value)
        self.army_mng = None

        # データベース準備
        self.dbm = DbManager()
        
        # ステータスバーの初期化
        self.CreateStatusBar()
        self.SetStatusText(u'Let\'s capture!')
        self.GetStatusBar().SetBackgroundColour(None)

        # メニューバーの初期化
        selected_nwif_index = self.dbm.get_selected_nwif_index()
        self.pcap.nif_mng.set_selected_devId(selected_nwif_index)
        menuBar = CaptureMenuBar(self.pcap.nif_mng.get_networkif_listf())
        menuBar.set_selected_nwif(selected_nwif_index)
        self.SetMenuBar(menuBar)
        self.Bind(wx.EVT_MENU, self.select_networkif)

        # パネル
        panel_base = wx.Panel(self, wx.ID_ANY)
        self.panel_capture = CapturePanel(panel_base)
        self.panel_result = ResultPanel(panel_base, self.dbm)

        # クリックリスナ登録
        self.panel_capture.set_on_click_listener(self.cap_btn_click_listener)

        layout = wx.BoxSizer(wx.VERTICAL)
        layout.Add(self.panel_capture, flag=wx.EXPAND | wx.ALL, border=10)
        layout.Add(self.panel_result, proportion=1, flag=wx.EXPAND)
        panel_base.SetSizer(layout)
        layout.Fit(panel_base)