Beispiel #1
0
def convert_fields(dict_data):
    config.log_message("Converting fields to Excel sheet...")
    dict_names = []
    names = []
    labels = []
    types = []
    constraints = []
    descriptions = []

    for dictionary in dict_data:
        for field in dictionary['fields']:
            dict_names.append(dictionary['code'])
            names.append(field['name'])
            labels.append(field['label'])
            types.append(field['type'])
            constraints.append(field['constraints'])
            descriptions.append(field['description'])

    data = {
        'dictionary_code': dict_names,
        'name': names,
        'label': labels,
        'type': types,
        'constraints': constraints,
        'description': descriptions
    }

    df = pd.DataFrame(data=data)
    config.log_message("Done!")
    return df
Beispiel #2
0
def create_catalogue_section():
	config.log_message("Converting the catalogue to JSON...")
	pd = read_sheet('catalogue',{'key':str,'value':str})
	pd=pd.dropna()

	result = pd.to_dict(orient="records")
	supported_values=['title','description','creator','contactPoint','license','versionInfo','keyword','identifier','rights','publisher_name','publisher_url']
	catalogue_dict={}
	publisher={}
	for row in result:
		if row['key'] in supported_values:
			# Those that need more work to get into required structure
			if row['key']=='publisher_name':
				publisher['name']=row['value']
			elif row['key']=='publisher_url':
				publisher['url']=row['value']	
			elif row['key']=='keyword':
				catalogue_dict['keyword']=row['value'].split(",")
			else:
				catalogue_dict[row['key']]=row['value']

	catalogue_dict["publisher"]=publisher	
	config.log_message("Done!")

	return catalogue_dict	
Beispiel #3
0
def create_json_structure():
	config.log_message("Converting Excel to FAIR JSON...")
	json_dict=create_configuration_section()
	json_dict['catalogue']=create_catalogue_section()
	json_dict['dictionaries']=create_dictionary_section()

	json_content=json.dumps(json_dict, indent=4)

	f = open(script_args.output_file, "a")
	f.write(json_content)
	f.close()	
	config.log_message("Conversion Complete. The FAIR JSON can be found in the file '" + str(script_args.output_file)+"'")
Beispiel #4
0
def write_to_excel(dataframes):
    config.log_message("Writing Excel sheets...")

    writer = pd.ExcelWriter(script_args.output_file, engine='xlsxwriter')

    for df in dataframes:
        dataframes[df].to_excel(writer, sheet_name=df, index=False)

    writer.save()
    config.log_message(
        "Conversion Complete. The Excel file can be found at '" +
        str(script_args.output_file) + "'")
Beispiel #5
0
def convert_configuration(configuration_data):
    config.log_message("Converting configuration to Excel sheet...")
    keys = []
    values = []

    for key in configuration_data:
        if key in ('visibility', 'workflow_key', 'code'):
            keys.append(key)
            values.append(configuration_data[key])

    data = {'key': keys, 'value': values}
    df = pd.DataFrame(data=data)
    config.log_message("Done!")
    return df
Beispiel #6
0
def create_configuration_section():
	config.log_message("Converting the configuration to JSON...")
	pd = read_sheet('configuration',{'key':str,'value':str})
	pd=pd.dropna()

	result = pd.to_dict(orient="records")
	supported_values=['visibility','workflow_key','code']
	configuration_dict={}
	for row in result:
		if row['key'] in supported_values:
			configuration_dict[row['key']]=row['value']

	config.log_message("Done!")

	return configuration_dict
Beispiel #7
0
def create_lookups_json(lookup_name, lookup_type):
	config.log_message("-- Converting lookup: '" + lookup_name + "' to JSON...")
	pd = read_sheet('lookups',{'lookup':str,'name':str,'description':str})
	result = pd.to_dict(orient="records")

	lookups_dict={}
	options_arr=[]
	lookups_dict['type']=lookup_type
	lookups_dict['options']=options_arr
	for row in result:
		if row['lookup']==lookup_name:
			options_dict={}
			options_dict['name']=row['name']
			options_dict['description']=str(row['description'])
			options_arr.append(options_dict)
	return lookups_dict
Beispiel #8
0
def convert_dictionaries(dict_data):
    config.log_message("Converting dictionaries to Excel sheet...")
    codes = []
    names = []
    descriptions = []

    for dictionary in dict_data:
        codes.append(dictionary['code'])
        names.append(dictionary['name'])
        descriptions.append(dictionary['description'])

    data = {'code': codes, 'name': names, 'description': descriptions}

    df = pd.DataFrame(data=data)
    config.log_message("Done!")

    return df
def process_report(text_content, html_content):

    global server_connected
    global server

    if config.config['method'] == 'mail':

        if config.config['mail_format'] == 'html':
            message = MIMEText(html_content, 'html', 'utf-8')
        else:
            message = MIMEText(text_content, 'plain', 'utf-8')

        message['Subject'] = 'Movies torrents digest'
        message['From'] = config.config['from']
        message['To'] = config.config['to']

        try:
            server = smtplib.SMTP(config.config['smtp_server'])
            server_connected = True
        except:
            config.log_message("Unexpected error while connecting to mail server:" + str(sys.exc_info()[0]), 'error')
            config.log_message("Printing report to console\n")
            config.console_log("\n")
            config.console_log(text_content)

        if server_connected:
            try:
                server.ehlo()
                server.starttls()
                server.login(config.config['username'], config.config['password'])
                server.sendmail(config.config['from'], config.config['to'], message.as_string())
                server.quit()
                config.log_message("Report sent by mail")
            except:
                config.log_message("Unexpected error while sending mail :" + str(sys.exc_info()[0]), 'error')
                config.log_message("Printing report to console")
                config.console_log("\n")
                config.console_log(text_content)
            finally:
                server.close()

    else:
        config.console_log("\n")
        config.console_log(text_content)
Beispiel #10
0
def create_dictionary_section():
	pd = read_sheet('dictionaries',{'code':str,'name':str,'description':str})
	dictionaries = pd.to_dict(orient="records")

	dicts_arr=[]
	for row in dictionaries:
		config.log_message("Converting dictionary '" + row['name'] + "' to JSON...")

		fields_dict={}
		fields_dict['code']=row['code']
		fields_dict['name']=row['name']
		fields_dict['description']=row['description']
		fields,lookups=create_fields_json(row['code'])
		fields_dict['fields']=fields
		fields_dict['lookups']=lookups
		dicts_arr.append(fields_dict)

		config.log_message("Done!")
	return dicts_arr	
Beispiel #11
0
def convert_catalogue(catalogue_data):
    config.log_message("Converting catalogue to Excel sheet...")
    keys = []
    values = []

    for key in catalogue_data:
        if key == 'publisher':
            keys.append('publisher_name')
            values.append(catalogue_data[key]['name'])
            if 'url' in catalogue_data[key].keys():
                keys.append('publisher_url')
                values.append(catalogue_data[key]['url'])
        elif key == 'keyword':
            keys.append('keyword')
            values.append(','.join(catalogue_data[key]))
        else:
            keys.append(key)
            values.append(catalogue_data[key])

    data = {'key': keys, 'value': values}
    df = pd.DataFrame(data=data)
    config.log_message("Done!")
    return df
Beispiel #12
0
def convert_lookups(dict_data):
    config.log_message("Converting lookups to Excel sheet...")

    lookup_names = []
    field_names = []
    descriptions = []

    for dictionary in dict_data:
        for lookup in dictionary['lookups']:
            for vocab in dictionary['lookups'][lookup]['options']:
                lookup_names.append(lookup)
                field_names.append(vocab['name'])
                descriptions.append(vocab['description'])

    data = {
        'lookup': lookup_names,
        'name': field_names,
        'description': descriptions
    }

    df = pd.DataFrame(data=data)
    config.log_message("Done!")
    return df
Beispiel #13
0
def create_fields_json(dictionary_code):
	config.log_message("-- Converting fields to JSON...")
	pd = read_sheet('fields',{'dictionary_code':str,'name':str,'label':str,'type':str,'constraints':str,'description':str})
	pd=pd.replace(np.nan,"null")
	result = pd.to_dict(orient="records")

	fields_arr=[]
	constraints_dict={}
	for row in result:
		if row['dictionary_code']==dictionary_code:
			fields_dict={}
			fields_dict['name']=row['name']
			fields_dict['label']=row['label']
			fields_dict['type']=row['type']
			fields_dict['constraints']=row['constraints']
			if row['constraints']!='null':
				constraints_dict[row['constraints']]=row['type']
			fields_dict['description']=row['description']
			fields_arr.append(fields_dict)

	lookups_dict={}
	for constraint, type in constraints_dict.items():
		lookups_dict[constraint]=create_lookups_json(constraint, type)
	return fields_arr, lookups_dict
def parse_feed():

    list_movie = dict()
    list_movie_discarded = dict()
    execution_log = StringIO()
    first_print = True

    for feed_url in feed_urls:
        results = parse(feed_url)
        for entry in results['entries']:

            torrent_title = entry['title'].strip()

            # Depending on feed source, link to torrent html page might be added. We're only interested in the link
            # to the torrent file itself, that should lie at the end of the list
            torrent_file_url = entry['links'][-1]['href']

            # Delay console prints if user is prompted for configuration
            if config.status == 'init':
                execution_log.write("Processing : " + torrent_title + "\n")
            elif config.status == 'crash':
                sys.exit()
            else:
                if first_print:
                    previous_logs = execution_log.getvalue()
                    execution_log.close()

                    if len(previous_logs) > 0:
                        for line in previous_logs.splitlines():
                            config.log_message(line)
                    first_print = False

                config.log_message("Processing " + torrent_title + "\n")

            pos = 0
            properties = dict()
            has_title = False
            while pos < len(torrent_title)-1:
                data, pos, has_title = analyze_filename_content(torrent_title, pos, has_title)
                key = data[0].lower()
                value = data[1]
                value = re.sub(r'_|\.', r' ', value)
                if key in properties:
                    if key in ['misc', 'tag', 'lan']:
                        properties[key] += u" " + value.lstrip().strip()
                else:
                    properties[key] = value.lstrip().strip()

            if entry.has_key("summary"):
                summary = entry['summary']

                pos = 0
                tmp_dict = dict()
                while pos < len(summary)-1:
                    data, pos = analyze_summary_content(summary, pos)
                    if data != ():
                        key = data[0].lower()
                        value = data[1]
                        tmp_dict[key] = value.strip()

                if tmp_dict.has_key('rating') and tmp_dict.has_key('title') and tmp_dict.has_key('year'):
                    # A torrent whose summary has all 3 information above is considered reliable. Consider these data
                    # rather than data extracted from torrent's name
                    for key, value in tmp_dict.iteritems():
                        properties[key] = value

            if 'title' in properties:
                if 'rip' in properties:
                    if 'lan' not in properties or re.search(r'hindi|punjabi', properties['lan'].lower()) is None:
                        key = comparing_title(properties['title'])
                        if not key in list_movie:
                            if not torrent_title in list_movie_discarded:
                                get_imdb_info(properties)

                                if properties['trust_imdb'] and properties['rating'] < 6.5:
                                    properties['discard'] = 'Bad IMDB rating : ' \
                                                            + str(properties['rating']) \
                                                            + ' - ' \
                                                            + properties['imdb_url']
                            else:
                                properties['discard'] = 'Dummy text not used'
                    else:
                        properties['discard'] = 'Hindi movie'
                else:
                    properties['discard'] = 'Not a rip'
            else:
                properties['discard'] = 'No title found in torrent\'s name'
                properties['title'] = torrent_title

            properties['torrent_title'] = torrent_title
            properties['torrent_file_url'] = torrent_file_url

            # Depending on feed source, size of torrent content might be stored in different keys
            try:
                byte_length = int(entry['torrent_contentlength'])
            except:
                byte_length = 0
            try:
                byte_length = int(entry['contentlength']) if byte_length == 0 else byte_length
            except:
                byte_length = 0
            try:
                byte_length = int(entry['size']) if byte_length == 0 else byte_length
            except:
                byte_length = 0

            if byte_length != 0:
                mb = byte_length / (1024*1024)
                if mb > 1024:
                    gb = round(mb/float(1024), 2)
                    properties['size'] = (str(gb), 'GB')
                else:
                    properties['size'] = (str(mb), 'MB')
            else:
                properties['size'] = None

            if not 'discard' in properties:
                key = comparing_title(properties['title'])

                if not key in list_movie:
                    list_torrent = []
                    list_movie[key] = list_torrent
                list_movie[key].append(properties)
            else:
                if not torrent_title in list_movie_discarded:
                    list_movie_discarded[torrent_title] = properties

    log_to_print = False
    while config.status == 'init':
        log_to_print = True
        sleep(1)

    if config.status == 'ok':
        if log_to_print:
            previous_logs = execution_log.getvalue()
            execution_log.close()

            if len(previous_logs) > 0:
                for line in previous_logs.splitlines():
                    config.log_message(line)

    else:
        sys.exit()

    html_content, text_content = mail_utils.format_report(list_movie, list_movie_discarded)
    mail_utils.process_report(text_content, html_content)