コード例 #1
0
ファイル: parsing_argo_photo.py プロジェクト: gora5624/Python
def get_image(price):
    price_argo = read_csv(price)
    list_image_dir = []
    i = 1
    for stuff in price_argo:
        html = get_html(stuff['url_stuff'])
        soup = BeautifulSoup(html, 'lxml')
        try:
            image_list = soup.find('div', id='product-gallery').find_all('a')
        except:
            image_list = soup.find('div', class_='image-border').find_all('a')
        image_list_url = []
        for image in image_list:
            url_image = image.get('href')
            if url_image in image_list_url:
                break
            image_list_url.append(url_image)
        dir_for_image = os.path.join(
            r'D:\tmp\python\python_parsing\parsing_ARGO', 'image',
            stuff['category_name'],
            stuff['name_stuff']).replace(' ', '_').replace('"', '')
        while dir_for_image in list_image_dir:
            dir_for_image = dir_for_image + '_' + str(i)
            i += 1
        os.makedirs(dir_for_image)
        list_image_dir.append(dir_for_image)
        for i, image in enumerate(image_list_url):
            if image == '':
                continue
            with open(os.path.join(dir_for_image, '{}.jpg'.format(i)),
                      'wb') as image_file:
                image_file.write(requests.get(image).content)
        write_csv(stuff, dir_for_image + '\info.csv')
コード例 #2
0
def GetFilmCatalog(ListData, num):
    NameFile = r'D:\tmp\my_prod\Python\python\ParsingMobi711\MobiParsBk_mp.csv'
    for Data in ListData:
        html = get_html('https://mobi711.ru' + Data['Url'])
        soup = BeautifulSoup(html, 'lxml')
        try:
            DivUrlFilmCatalog = soup.find_all('div',
                                              class_='category no-description')
        except AttributeError:
            DivUrlFilmCatalog = []
        if DivUrlFilmCatalog != []:
            for Div in DivUrlFilmCatalog:
                UrlFilmCatalog = 'https://mobi711.ru' + \
                    Div.find('div', class_='text').find('a').get('href')
                Name = Div.find('div', class_='text').find('a').text
                if Name == 'Защитные пленки и стекла':
                    break
                else:
                    UrlFilmCatalog = ''
                    Name = ''
        else:
            UrlFilmCatalog = ''
        data = {
            'Brand': Data['Brand'],
            'Model': Data['Model'],
            'UrlModel': Data['Url'],
            'UrlFilmCatalog': UrlFilmCatalog
        }

        write_csv(data, NameFile)
コード例 #3
0
def get_stuff_info(list_stuff):
    for stuff in list_stuff:
        url_stuff = stuff['url_stuff']
        html_stuff = get_html(url_stuff)
        soup = BeautifulSoup(html_stuff, 'lxml')
        text_info = soup.find('div', id='test').find_all('li')
        size, color, cloth = '', '', ''
        price = ''
        try:
            price = soup.find('div', class_='retail-price price-prod').text
        except:
            price = ''
        for text_ in text_info:
            cur_line_name = text_.find('span', class_='opts-lab').text
            cur_line_val = text_.find('span', class_='opts-val').text
            if cur_line_name == 'Размер:':
                size = cur_line_val
            elif cur_line_name == 'Цвет:':
                color = cur_line_val
            elif cur_line_name == 'Ткань:':
                cloth = cur_line_val
        data_stuff = {
            'category_name': stuff['category_name'],
            'name_stuff': stuff['name_stuff'],
            'url_stuff': stuff['url_stuff'],
            'size': size,
            'color': color,
            'cloth': cloth,
            'price': price
        }
        print(data_stuff)
        write_csv(data_stuff,
                  r'D:\tmp\my_prod\Python\python\parsing_ARGO\argo.csv')
コード例 #4
0
def get_information():
    data = {
        'name_stuff': name_stuff,
        'url_stuff': stuff_url,
        'size': size,
        'color': color,
        'cloth': cloth
    }
    driver_new.quit()
    print(data)
    write_csv(data, r'D:\tmp\my_prod\Python\python\parsing_ARGO\argo.csv')
コード例 #5
0
ファイル: FileListInFolder.py プロジェクト: gora5624/Python
def main():
    path = os.path.abspath(input('Введите путь к папке: '))
    if path == '':
        path = os.curdir
    FileName = input('Введите путь и название файла с результатами: ')
    if FileName == '':
        FileName = 'ListFile.csv'
    data = []
    data = (os.listdir(path))
    for line in data:
        Data = {'Имя файла': line}
        write_csv(Data, FileName)
コード例 #6
0
ファイル: tablet.py プロジェクト: gora5624/Python
def GetData(brand, model, img, mainUrl):
    dataCsv = {'Brand': brand['nameBrand'],
               'Model': model['nameModel'],
               'Img Directory': brand['nameBrand']+'\\'+model['nameModel']+'\\'+img['nameImg']+'.jpg' if img['nameImg'] != None else 'None'}
    if img['nameImg'] != None:
        pathImg = os.path.join(
            'tablets', brand['nameBrand'], model['nameModel'])
        os.makedirs(pathImg)
        with open(os.path.join(pathImg, img['nameImg']+'.jpg'), 'wb') as imageFile:
            imageFile.write(requests.get(mainUrl+img['urlImg']).content)
            imageFile.close()
    write_csv(dataCsv, 'Tablet.csv')
コード例 #7
0
def get_stuff_info(list_stuff_on_site):
    for stuff in list_stuff_on_site:
        html = get_html(stuff['stuff_url'])
        with open('0.html', 'w', encoding='utf-8') as f:
            f.write(html)
            f.close()
        soup = BeautifulSoup(html, 'lxml')
        try:
            descr = soup.find('div', id='tab-description').text
        except AttributeError:
            descr = ''
        try:
            atr = soup.find(
                'div', id='tab-specification').find('div', class_='attribute').text
        except AttributeError:
            atr = ''
        try:
            size_span = soup.find('div', class_='option row').find(
                'tbody').find_all('span', class_='size-title')
        except AttributeError:
            size_span = []
        size_span = soup.find('div', class_='option row').find(
            'tbody').find_all('span', class_='size-title')
        size = []
        for size_tmp in size_span:
            size.append(size_tmp.text)
            ','.join(size)
        data = {'catalog_name': stuff['catalog_name'],
                'stuff_name': stuff['stuff_name'],
                'stuff_url': stuff['stuff_url'],
                'descr': descr.replace('\n', ' ').replace('\r', ' '),
                'size': size,
                'atr': atr.replace('\n', ' ').replace('\r', ' ')}
        main_dir = r'D:\tmp\python\python_parsing\parsing_ck_textil'
        write_csv(
            data, r'D:\tmp\python\python_parsing\parsing_ck_textil\ck_textil.csv')
        try:
            image_tag_a_list = soup.find(
                'div', class_='MagicToolboxSelectorsContainer').find_all('a')
        except AttributeError:
            image_tag_a_list = soup.find(
                'div', class_='MagicToolboxContainer selectorsBottom minWidth').find_all('a')
        image_dir = os.path.join(
            main_dir, stuff['catalog_name'], stuff['stuff_name']).replace(' ', '_')
        os.makedirs(image_dir)
        for i, image_tmp in enumerate(image_tag_a_list):
            image_tmp.get('href')
            image_url = 'https://' + image_tmp.get('href')[2:]
            with open(os.path.join(image_dir, '{}.jpg'.format(i)), 'wb') as file:
                file.write(requests.get(image_url).content)
        write_csv(data, os.path.join(image_dir, 'info.csv'))
コード例 #8
0
def get_stuff_on_page(page_catalog_url):
    list_stuff_on_page = []
    html = get_html(page_catalog_url)
    soup = BeautifulSoup(html, 'lxml')
    list_stuff = soup.find_all('div', class_='product-thumb transition')
    catalog_name = soup.find('h1').text
    for stuff in list_stuff:
        url = stuff.find('div', class_='caption').find('a').get('href')
        stuff_name = stuff.find(
            'div', class_='caption').find('a').text.replace('"', '')
        data = {'catalog_name': catalog_name,
                'stuff_name': stuff_name,
                'stuff_url': url}
        list_stuff_on_page.append(data)
        write_csv(data, 'ck.csv')
    return list_stuff_on_page
コード例 #9
0
def GetListBrandAndModels(mainUrl):
    html = get_html(mainUrl)
    soup = BeautifulSoup(html, 'lxml')
    ListBrandDiv = soup.find_all('div', class_='category-wrap')
    for Div in ListBrandDiv:
        BrandName = Div.find('div', class_='text').find('a').text
        try:
            ListModelsA = Div.find('div', class_='sub').find_all('a')
        except AttributeError:
            ListModelsA = ''
        for A in ListModelsA:
            Text = A.text
            if Text != 'Показать еще':
                ModelName = Text
                Url = A.get('href')
            data = {'Brand': BrandName, 'Model': ModelName, 'Url': Url}
            write_csv(data, 'Mobi711.csv')
コード例 #10
0
def get_all_stuff(list_stuff):
    list_dir = []
    for stuff in list_stuff:
        html = get_html(stuff['stuff_url'])
        soup = BeautifulSoup(html, 'lxml')
        with open('0.html', 'w', encoding='utf-8') as file:
            file.write(html)
            file.close()
        try:
            descr = soup.find(
                'div',
                class_='block-text block-type-catalogitem-text textcontent'
            ).find('p').text
        except:
            descr = ''
        try:
            url_image = soup.find('div',
                                  class_='block-picture').find('a').get('href')
        except:
            continue
        catalog_name = stuff['catalog_name']
        stuff_name = stuff['stuff_name'].replace('"', '')
        stuff_url = stuff['stuff_url']
        data = {
            'catalog_name': catalog_name,
            'stuff_name': stuff_name,
            'stuff_url': stuff_url,
            'stuff_descr': descr
        }
        main_dir = r'D:\tmp\python\python_parsing\parsing_formateks'
        new_dir = os.path.join(main_dir, catalog_name,
                               stuff_name).replace(' ', '_').lower()

        if new_dir in list_dir:
            new_dir = new_dir + '_1'
        else:
            list_dir.append(new_dir)
        os.makedirs(new_dir)
        write_csv(data, os.path.join(new_dir, 'info.csv'))
        write_csv(data, os.path.join(main_dir, 'formarket.csv'))
        with open(os.path.join(new_dir, '0.jpg'), 'wb') as file:
            file.write(requests.get(url_image).content)
            file.close()
コード例 #11
0
def get_stuff_on_page(url_catalog):
    for item in url_catalog:
        for page in range(item['max_page']):
            if page == 0:
                html = get_html(item['url'])
            else:
                html = get_html(item['url'] + '/page-{}'.format(page + 1))
            soup = BeautifulSoup(html, 'lxml')
            with open('0.html', 'w', encoding='utf-8') as f:
                f.write(html)
                f.close()

            stuff_div = soup.find(
                'div', class_='catalog-collection cleared').find_all('h3')
            for stuff in stuff_div:
                stuff_url = 'http://formateks.ru' + stuff.find('a').get('href')
                stuff_name = stuff.find('a').text
                data = {
                    'catalog_name': item['catalog_name'],
                    'stuff_name': stuff_name,
                    'stuff_url': stuff_url
                }
                write_csv(data, 'formarket.csv')
コード例 #12
0
ファイル: tmp.py プロジェクト: gora5624/Python
import os
from my_lib import write_csv

file_list = os.listdir(r'D:\Done\Antyspy')
for file in file_list:
    data = {'name': file.replace('.jpg', '')}
    write_csv(data, "tmp.csv")