コード例 #1
0
def main():
    database_handler = DatabaseHandler()
    if predict_prices_using_price_parameters:
        filename = 'estimated_prices.csv'
    else:
        filename = 'estimated_prices_based_on_no_price_parameters.csv'
    try:
        with open(filename, mode='a') as estimated_prices_file:
            estimated_prices_writer = csv.writer(estimated_prices_file,
                                                 delimiter=',',
                                                 quotechar='"',
                                                 quoting=csv.QUOTE_MINIMAL)
            for bucket in classification_buckets:
                model_filename = get_model_filename_b(bucket)
                model = keras_load_model(model_filename)
                if predict_prices_using_price_parameters:
                    procedure = 'GetDataToParcelsValuation'
                else:
                    procedure = 'GetDataToParcelsValuationWithoutPriceParameters'
                df_parcels_to_valuation = database_handler.execute_query(
                    "EXEC dbo.{} "
                    "@LimitDate = {}, "
                    "@BucketType={}, "
                    "@ExcludedList='{}'".format(procedure, limit_date,
                                                parcel_prices_mapping[bucket],
                                                excluded_values))
                data_size = df_parcels_to_valuation.shape[1]
                # skip the first attribute - OBJECTID and the last attribute - Sale_Amount, which will be predicted
                x = df_parcels_to_valuation.iloc[:, 1:data_size - 1]
                prediction = model.predict(x)

                for (prediction_value,
                     object_id) in zip(prediction,
                                       df_parcels_to_valuation['OBJECTID']):

                    if prediction_value[0] < 0:
                        prediction_value[0] = 0

                    estimated_prices_writer.writerow(
                        [object_id,
                         np.uint64(round(prediction_value[0], 0))])
    finally:
        database_handler.close_connection()
コード例 #2
0
def recommendations(movies_watched, df):
    cos_sim = find_similarity(df)
    movies = []
    indexes = []
    db = DatabaseHandler("user1")
    #Initialize an empty Series object to sum all the scores across several movies that the user has seen
    summed_score_series = pd.Series(0, dtype="float64")
    #TO-DO: Handle case if for some reason no movie is in the list
    for title in movies_watched:
        #Find index for the movie
        index = index_from_title(df, title)
        #Save all the indexes for the movies that the user has seen (used for filtering later)
        indexes += indexes_from_title(df, title)
        #Create a series of all the others titles and their similarity
        score_series = pd.Series(cos_sim[index - 1])
        #Fetch user rating and invert the similarity values if the user did not like the movie
        rating = db.get_rating(title)
        print(f"{title} has rating: {rating}")
        if (rating == -1):
            score_series = score_series.apply(lambda x: 1 - x)
        #Add the series to a summed series that aggregates the similarity scores for all movies prev. seen
        summed_score_series = summed_score_series.add(score_series,
                                                      fill_value=0)

    db.close_connection()
    #Sort the series with the most similar one at index 0
    summed_score_series = summed_score_series.sort_values(ascending=False)
    #Create a list containing the indexes for the top movies.
    #If no top movie has been seen => 0-10. If one is seen already => 0-11
    #TO-DO:Handle if the dataframe containes less movies than we expect
    top_indexes = list(summed_score_series.iloc[0:(10 + len(indexes))].index)
    #Remove index for movie already seen. Should result in a list length of 10
    top_indexes_filtered = [n for n in top_indexes if n not in indexes]
    for i in top_indexes_filtered:
        #print(f'adding movie{(list(df.index)[i])}')
        movies.append(list(df.index)[i])
    return movies[0:10]
コード例 #3
0
class Main:
    def __init__(self):
        self.request_count = 0
        self.dh = DatabaseHandler('database')
        if not self.dh.fill_database():
            raise RuntimeError("Error during filling the database")

    def request_house_profile(self, addr: tuple, simple: bool = True):

        if simple:
            url = ('https://www.reformagkh.ru/search/houses?query='
                '{}+{}+{}+{}+{}&mh=on'.format(*addr)) \
                .replace('.0', '').replace(' ', '+')
            r = requests.get(url)
            while '403' in str(r):
                print('Connection refused. Wait for 30 sec')
                time.sleep(30)
            else:
                m = re.search(r'/myhouse/profile/view/[0-9]+', r.text)
                if m:
                    self.dh.insert_result(
                        self.request_house_info(m.group(0)) + (addr[-1], ))
                    self.dh.update(addr[-1], code=-1)
                else:
                    self.dh.update(addr[-1], code=addr[-2] + 1)

    def request_house_info(self, profile_url: str) -> tuple:
        url = 'https://www.reformagkh.ru{}'.format(profile_url)
        r = requests.get(url)
        text = ' '.join(r.text.split('\n'))

        year = re.search(
            r'Год ввода дома в эксплуатацию.*?<span>(?P<year>.*?)</span>',
            text).group('year').strip()
        # print(year)

        stages = re.search(
            r'Количество этажей.*?<span>наибольшее.*?' \
            r'<span>(?P<stages>.*?)</span>', text
        ).group('stages').strip()
        # print(stages)

        # TODO change date format
        last_change = ' '.join(re.search(
            r'Последнее изменение анкеты.*?' \
            r'<span class="black_text">(?P<last_change>.*?)</span>', text
        ).group('last_change').strip().split())
        # print(last_change)

        series = re.search(
            r'Серия, тип постройки здания.*?<span>(?P<series>.*?)</span>',
            text).group('series').strip()
        # print(series)

        building_type = series

        house_type = re.search(r'Тип дома.*?<span>(?P<house_type>.*?)</span>',
                               text).group('house_type').strip()
        # print(house_type)

        is_wreck = re.search(
            r'Дом признан аварийным.*?<span>(?P<is_wreck>.*?)</span>',
            text).group('is_wreck').strip()
        is_wreck = 1 if is_wreck == 'Да' else 0
        # print(is_wreck)

        cadaster_number = re.search(
            r'Кадастровый номер.*?10px;">(?P<cadaster_number>.*?)</td>',
            text).group('cadaster_number').strip()
        # print(cadaster_number)

        overlapping_type = re.search(
            r'Тип перекрытий.*?<span>(?P<overlapping_type>.*?)</span>',
            text).group('overlapping_type').strip()
        # print(overlapping_type)

        wall_material = re.search(
            r'Материал несущих стен.*?<span>(?P<wall_material>.*?)</span>',
            text).group('wall_material').strip()
        # print(wall_material)

        return (
            year,
            stages,
            last_change,
            series,
            building_type,
            house_type,
            is_wreck,
            cadaster_number,
            overlapping_type,
            wall_material,
        )

    def run(self):

        while True:
            try:
                self.request_count += 1

                print('Request #{}'.format(self.request_count))
                print('-' * 16)
                dr = self.dh.database_reader()
                for addr in dr:
                    self.request_house_profile(addr)
                    time.sleep(3)
            except KeyboardInterrupt:
                print('\nResults:')
                self.dh.check_found()
                self.dh.count_brick_houses()
                self.dh.found_max_stages()
                print('\nBye.')
                self.dh.close_connection()
                break