Exemple #1
0
pub_tz = "US/Eastern"  # Timezone the publication is in
move_on_success = None  # Do we want to move files on success? (None = No, anything else = Yes)
success_dir = "success/"  # Directory for storing successful files (as subdirectory of data directory)

########### Load libraries
import parserfunctions
import re
from bs4 import BeautifulSoup

### Grab the information from our configuration file
config = parserfunctions.load_config()
homepages_dir = parserfunctions.homepages_dir(pubshort)
link_pattern = re.compile(pattern)

### Establish our MySQL Connection (for logging, etc.)
conn, cur, mysql_table_name, mysql_log_name = parserfunctions.create_mysql_conn(
    config)

### Create directory for success, if appropriate
parserfunctions.create_success_dir(pubshort, homepages_dir, move_on_success)

### Get list of files to parse
file_list, file_list_len = parserfunctions.get_file_list(
    pubshort, homepages_dir)
i = 1

### For each desktop homepage
for homepage in file_list:
    print("Opening file %s (%s of %s for %s)" %
          (homepage, i, file_list_len, pubshort))
    i += 1
Exemple #2
0
process_desktop = 1 # Do we want to process the desktop pages? (None = No, anything else = Yes)
move_on_success = None # Do we want to move files on success? (None = No, anything else = Yes)
success_dir = "success/" # Directory for storing successful files (as subdirectory of data directory)

########### Load libraries
import parserfunctions
import re
from bs4 import BeautifulSoup

### Grab the information from our configuration file
config = parserfunctions.load_config()
homepages_dir = parserfunctions.homepages_dir(pubshort)
link_pattern = re.compile(pattern)

### Establish our MySQL Connection (for logging, etc.)
conn, cur, mysql_table_name, mysql_log_name = parserfunctions.create_mysql_conn(config)

### Create directory for success, if appropriate
parserfunctions.create_success_dir(pubshort, homepages_dir, move_on_success)

########### Parse Desktop Pages
if process_desktop is not None:
    ### Get list of files to parse
    file_list, file_list_len = parserfunctions.get_file_list(pubshort, homepages_dir)
    i = 1
    
    ### For each desktop homepage
    for homepage in file_list:
        print("Opening file %s (%s of %s for %s)" % (homepage, i, file_list_len, pubshort))
        i += 1