Ejemplo n.º 1
0
    def play(self, track=None, context_uri=None, uris=None, device=None):
        """Play a Spotify track.

        Args:
            track (str, int): The track uri or position.
            context_uri (str): The context uri.
            uris (iter): Collection of uris to play.
            device (Device): A device to play.
        """
        data = {}

        # Special case when playing a set of uris.
        if uris:
            data['uris'] = uris
            if common.is_int(track):
                data["offset"] = {"position": track}
        elif context_uri:
            # Set the context that we are playing in.
            data["context_uri"] = context_uri

            if common.is_int(track):
                data["offset"] = {"position": track}
            elif isinstance(track, basestring):
                data["offset"] = {"uri": track}

        # No context given, just play the track.
        elif track is not None and not context_uri:
            if isinstance(track, basestring) and track.startswith("spotify:track"):
                data['uris'] = [track]

        params = {}
        if device and device['id']:
            params["device_id"] = device['id']

        self.put_api_v1("me/player/play", params, data)
Ejemplo n.º 2
0
def read_variants(variants, transcript_id, transcript_vars):
    for variant in variants:
        var_filter = variant['filter']
        # Skip non-pass quality variants
        if var_filter not in VALID_FILTERS:
            continue

        variant_id = variant['variant_id']
        veps = variant['vep_annotations']
        for vep in veps:
            if vep['Feature'] == transcript_id:
                csq = worst_csq_from_csq(vep['Consequence'])
                # Analyse only non-synonymous protein coding variants (except splicing)
                if csq in VALID_CSQS:
                    var = Variant()
                    var.variant_id = variant_id
                    var.csq = csq
                    var.xpos = variant['xpos']
                    protein_pos = vep['Protein_position']

                    # For indels and frameshifts use start position
                    if '-' in protein_pos:
                        protein_pos = protein_pos.split('-')[0]
                    # Check that protein position can be converted to integer
                    if is_int(protein_pos):
                        var.protein_pos = int(protein_pos)
                        transcript_vars[int(protein_pos)] = var
    return transcript_vars
Ejemplo n.º 3
0
    def exe_menu_0_3_1(self):
        """
        Box plot menu.
        :return:
        """
        try:
            print(self.cut_off)
            print(
                'You are trying to generate a BOX PLOT between a year range:')
            start_year_input_flag = True
            end_year_input_flag = True
            start_year = 0
            end_year = 0

            while start_year_input_flag:
                year_input = input(
                    'Enter the start year[from 1996 to now, c for Cancel]: '
                ).lower()
                if is_int(year_input
                          ) and 1996 <= int(year_input) <= date.today().year:
                    start_year = int(year_input)
                    start_year_input_flag = False
                elif year_input == 'c':
                    self.exe_menu_0_3()
                else:
                    print(self.invalid_input_str)

            while end_year_input_flag:
                year_input = input(
                    'Enter the end year[from 1996 to now, c for Cancel]: '
                ).lower()
                if is_int(year_input
                          ) and 1996 <= int(year_input) <= date.today().year:
                    end_year = int(year_input)
                    end_year_input_flag = False
                elif year_input == 'c':
                    self.exe_menu_0_3()
                else:
                    print(self.invalid_input_str)

            if start_year > end_year:
                start_year, end_year = end_year, start_year

            self.generate_box_plot(start_year, end_year)
            self.exe_menu_0_3()
        except Exception as e:
            self.logger.error(e)
Ejemplo n.º 4
0
 def _check_int_seq(self,obj,mx_int):
     if common.is_sequence(obj):
         for o in obj:
             if self._check_int_seq(o,mx_int):
                 return 1
     elif not common.is_int(obj) or obj>=mx_int:
         return 1
     return 0
Ejemplo n.º 5
0
 def _check_int_seq(self,obj,mx_int):
     if common.is_sequence(obj):
         for o in obj:
             if self._check_int_seq(o,mx_int):
                 return 1
     elif not common.is_int(obj) or obj>=mx_int:
         return 1
     return 0
Ejemplo n.º 6
0
 def askBillEntry(self):
     topic = ''
     programs = self.programs['Discretionary'].keys()
     while topic not in programs:
         print 'available programs: ', programs
         topic = raw_input('Enter topic to change: ')
     degree = None
     while not is_int(degree):  # (-10 < degree < 10):
         degree = int(raw_input('Enter how much to change: '))
     return topic, degree
Ejemplo n.º 7
0
    def exe_menu_0_3_2(self):
        """
        Line plot menu.
        :return:
        """
        try:
            print(self.cut_off)
            print(
                'You are trying to generate a LINE PLOT for a specific month:')
            year_input_flag = True
            month_input_flag = True
            specific_year = 0
            specific_month = 0

            while year_input_flag:
                year_input = input(
                    'Enter the year[from 1996 to now, c for Cancel]: ').lower(
                    )
                if is_int(year_input
                          ) and 1996 <= int(year_input) <= date.today().year:
                    specific_year = int(year_input)
                    year_input_flag = False
                elif year_input == 'c':
                    self.exe_menu_0_3()
                else:
                    print(self.invalid_input_str)

            while month_input_flag:
                month_input = input(
                    'Enter the month[1-12, c for Cancel]: ').lower()
                if is_int(month_input) and 1 <= int(month_input) <= 12:
                    specific_month = int(month_input)
                    month_input_flag = False
                elif month_input == 'c':
                    self.exe_menu_0_3()
                else:
                    print(self.invalid_input_str)

            self.generate_line_plot(specific_year, specific_month)
            self.exe_menu_0_3()
        except Exception as e:
            self.logger.error(e)
Ejemplo n.º 8
0
    def askPrefEntry(self):
        topic = ''
        prefs = self.itbounds
        while topic not in prefs:
            print 'available prefs: ', prefs.keys()
            topic = raw_input('Enter prefs to change: ')
        degree = None
        while not is_int(degree):  # (-10 < degree < 10):
            degree = int(raw_input('Enter how much to change: '))

        spin()
        return topic, degree
Ejemplo n.º 9
0
 def _check_dimensions(self):
     for i in range(3):
         d = self.dimensions[i]
         if not common.is_int(d):
             self.error('dimensions[%s] must be int but got %s'%(i,type(d)))
             return 1
         if d<=0:
             self.error('dimensions[%s] must be positive int but got %s'%(i,d))
             return 1
     if hasattr(self,'points'):
         d = reduce(lambda x,y:x*y,self.dimensions,1)
         if len(self.points)!=d:
             self.error('mismatch of points length (%s) and dimensions size (%s)'%(len(self.points),d))
             return 1
     return 0
Ejemplo n.º 10
0
 def _check_dimensions(self):
     for i in range(3):
         d = self.dimensions[i]
         if not common.is_int(d):
             self.error('dimensions[%s] must be int but got %s'%(i,type(d)))
             return 1
         if d<=0:
             self.error('dimensions[%s] must be positive int but got %s'%(i,d))
             return 1
     if hasattr(self,'points'):
         d = reduce(lambda x,y:x*y,self.dimensions,1)
         if len(self.points)!=d:
             self.error('mismatch of points length (%s) and dimensions size (%s)'%(len(self.points),d))
             return 1
     return 0
Ejemplo n.º 11
0
    def _read_csv(self, key_column_name):
        path, file = os.path.split(self.path_to_csv)
        print 'Reading file "' + file + '"...'
        input_file = open(self.path_to_csv, 'rt')
        reader = csv.reader(input_file, delimiter=self.delimiter)

        self.headers = next(reader)
        self.column_num = len(self.headers)
        self.row_num = len(open(self.path_to_csv).readlines())

        if self.auto_detect_types:
            int_columns = [True] * len(self.headers)
            float_columns = [True] * len(self.headers)

        line_number = 0
        bar = progressbar.ProgressBar(maxval=1.0).start()
        for row in reader:

            for x in range(0, self.column_num):
                if self.auto_detect_types:
                    if int_columns[x]:
                        int_columns[x] = is_int(row[x])
                    if float_columns[x]:
                        float_columns[x] = is_float(row[x])

            row_dict = self.__row_to_dict(row)
            self.data.append(row_dict)

            if key_column_name:
                self.data_dict[row_dict[key_column_name]] = row_dict

            line_number += 1
            bar.update((line_number + 0.0) / self.row_num)
        bar.finish()

        if self.auto_detect_types:
            self.__format_data(int_columns, float_columns)
Ejemplo n.º 12
0
def compareToDB(wikiObj, odokObj, wpApi, dbReadSQL, verbose=False):
    '''
    compares a listobj to equiv obj in database
    this needs to deal with links and wikitext
    this should check clash parameter

    should return (diff, log)
            diff: dict of changes (if any) otherwise NONE
            log: list of issues encountered e.g. incorrecly formated wikitext
    TODO:
        proper log for coordinates
        only care about first X decimals in coordinate
        return needed/removed links
        fotnot-name
        should anything be done with:
            * odok:u'same_as'
            * odok:u'year_cmt'
    '''
    # wikiObj.keys() = [u'typ', u'artikel', u'titel', 'clash', u'inomhus', u'material', u'döljStadsdel', u'län', u'konstnär2',
    #                   u'konstnär3', u'konstnär4', u'konstnär5', u'konstnär6', u'konstnär7', u'konstnär8', u'konstnär9',
    #                   u'döljKommun', u'lat', u'plats', u'fotnot', u'fotnot2', u'fotnot3', u'id', u'kommun',
    #                   u'bild', u'stadsdel', u'commonscat', u'fri', u'konstnär', u'lon', u'beskrivning', u'årtal', u'id-länk',
    #                   u'fotnot-namn', u'fotnot2-namn', u'fotnot3-namn', u'aka', u'page', u'lista', u'header']
    # odokObj.keys() = [u'changed', u'official_url', u'ugc', u'image', u'county', u'year', u'owner', u'commons_cat', u'id',
    #                   u'wiki', u'list', u'descr', u'title', u'lon', u'source', u'same_as', u'type', u'muni', u'material', u'free',
    #                   u'district', u'address', u'lat', u'year_cmt', u'artist', u'inside', u'created', u'cmt', u'removed']

    log = ''
    if wikiObj['clash']:
        log += u'clash with another page. Don\'t know how to resolve this. Skipping: %s\n' % wikiObj['clash']
        return (None, log)

    ## Pre-processing
    # get some more things from ODOK
    odokObj[u'linked_artists'] = dbReadSQL.findArtist(wikiObj[u'id'])
    odokObj[u'artist_links'] = []
    for a in odokObj[u'linked_artists']:
        odokObj[u'artist_links'].append(a['wiki'])
    odokObj[u'aka'] = ''
    akas = dbReadSQL.findAkas(wikiObj[u'id'])
    if akas:
        odokObj[u'aka'] = []
        for a in akas:
            odokObj[u'aka'].append(a['aka'])
        odokObj[u'aka'] = ';'.join(odokObj[u'aka'])
    if odokObj[u'wiki']:
        odokObj[u'wiki'] = odokObj[u'wiki'].upper()

    # the following is inherited from the header
    if wikiObj[u'header'][u'tidigare']:
        wikiObj[u'tidigare'] = 1
    else:
        wikiObj[u'tidigare'] = 0

    # the following may be inherited from the header
    if wikiObj[u'döljKommun']:
        wikiObj[u'kommun'] = wikiObj[u'header'][u'kommun']
    if not wikiObj[u'län']:
        wikiObj[u'län'] = wikiObj[u'header'][u'län']
    if wikiObj[u'döljStadsdel'] and not wikiObj[u'stadsdel']:  # only overwrite non existant
        wikiObj[u'stadsdel'] = wikiObj[u'header'][u'stadsdel']
    # the following are limited in their values but need mapping from wiki to odok before comparison
    if wikiObj[u'fri'].lower() == 'nej':
        wikiObj[u'fri'] = 'unfree'
    if wikiObj[u'inomhus']:
        if wikiObj[u'inomhus'].lower() == 'ja':
            wikiObj[u'inomhus'] = 1
        elif wikiObj[u'inomhus'].lower() == 'nej':
            wikiObj[u'inomhus'] = 0
        else:
            log += 'unexpected value for inside-parameter (defaulting to no): %s\n' % wikiObj[u'inomhus']
            wikiObj[u'inomhus'] = 0
    else:
        wikiObj[u'inomhus'] = 0
    if wikiObj[u'kommun']:  # need muni code
        wikiObj[u'kommun'] = dataDict.muni_name2code[wikiObj[u'kommun']]
    if wikiObj[u'län'].startswith(u'SE-'):
        wikiObj[u'län'] = wikiObj[u'län'][len(u'SE-'):]
    if wikiObj[u'lat'] == '':
        wikiObj[u'lat'] = None
    else:
        if len(wikiObj[u'lat']) > 16:
            wikiObj[u'lat'] = '%.13f' % float(wikiObj[u'lat'])
        wikiObj[u'lat'] = wikiObj[u'lat'].strip('0')  # due to how numbers are stored
    if wikiObj[u'lon'] == '':
        wikiObj[u'lon'] = None
    else:
        if len(wikiObj[u'lon']) > 16:
            wikiObj[u'lon'] = '%.13f' % float(wikiObj[u'lon'])
        wikiObj[u'lon'] = wikiObj[u'lon'].strip('0')  # due to how numbers are stored
    if wikiObj[u'årtal'] == '':
        wikiObj[u'årtal'] = None

    # Deal with artists (does not deal with order of artists being changed):
    artist_param = [u'konstnär', u'konstnär2', u'konstnär3',
                    u'konstnär4', u'konstnär5', u'konstnär6',
                    u'konstnär7', u'konstnär8', u'konstnär9']
    wikiObj[u'artists'] = ''
    artists_links = {}
    for a in artist_param:
        if wikiObj[a]:
            (w_text, w_links) = unwiki(wikiObj[a])
            wikiObj[u'artists'] = u'%s%s;' % (wikiObj[u'artists'], w_text)
            if w_links:
                artists_links[w_text] = w_links[0]
    if wikiObj[u'artists']:
        wikiObj[u'artists'] = wikiObj[u'artists'][:-1]  # trim trailing ;

    ## dealing with links:
    links = artists_links.values()
    if wikiObj[u'artikel']:
        if u'#' in wikiObj[u'artikel']:
            log += u'link to section: %s\n' % wikiObj[u'artikel']
        else:
            links.append(wikiObj[u'artikel'])
    if links:
        links = wpApi.getPageInfo(links)
        for k, v in links.iteritems():
            if u'disambiguation' in v.keys():
                log += u'link to disambigpage: %s\n' % k
                links[k] = ''
            elif u'wikidata' in v.keys():
                links[k] = v[u'wikidata']
            else:
                links[k] = ''
    else:
        links = {}
    # Stick wikidata back into parameters
    if wikiObj[u'artikel']:
        if u'#' not in wikiObj[u'artikel']:
            wikiObj[u'artikel'] = links.pop(wikiObj[u'artikel'])
        else:
            wikiObj[u'artikel'] = ''
    wikiObj[u'artist_links'] = links.values()

    ## Main-process
    diff = {}
    # easy to compare {wiki:odok}
    trivial_params = {u'typ': u'type',
                      u'material': u'material',
                      u'id-länk': u'official_url',
                      u'fri': u'free',
                      u'inomhus': u'inside',
                      u'artists': u'artist',
                      u'årtal': u'year',
                      u'commonscat': u'commons_cat',
                      u'beskrivning': u'descr',
                      u'bild': u'image',
                      u'titel': u'title',
                      u'aka': u'aka',
                      u'artikel': u'wiki',
                      u'list': u'list',
                      u'plats': u'address',
                      u'län': u'county',
                      u'kommun': u'muni',
                      u'stadsdel': u'district',
                      u'tidigare': u'removed',
                      u'lat': u'lat',
                      u'lon': u'lon',
                      u'fotnot': u'cmt'}

    for k, v in trivial_params.iteritems():
        (w_text, w_links) = unwiki(wikiObj[k])
        if not (w_text == odokObj[v]):
            diff[v] = {'new': w_text, 'old': odokObj[v]}
            if verbose:
                print u'%s:"%s"    <--->   %s:"%s"' % (k, w_text, v, odokObj[v])

    ## Needing separate treatment
    # comparing artist_links: u'artist_links':u'artist_links'
    artist_diff = {'+': [], '-': []}
    artist_links = list(set(wikiObj[u'artist_links'])-set(odokObj[u'artist_links']))
    if artist_links and len(''.join(artist_links)) > 0:
        artist_diff['+'] = artist_links[:]  # slice to clone the list
    artist_links = list(set(odokObj[u'artist_links'])-set(wikiObj[u'artist_links']))
    if artist_links and len(''.join(artist_links)) > 0:
        artist_diff['-'] = artist_links[:]  # slice to clone the list
    # handler can only deal with new artists
    if len(artist_diff['-']) == 0 and len(artist_diff['+']) > 0:
        artIds = dbReadSQL.getArtistByWiki(artist_diff['+'])  # list of id:{'first_name', 'last_name', 'wiki', 'birth_date', 'death_date', 'birth_year', 'death_year'}
        newArtistLinks = []
        for k, v in artIds.iteritems():
            artist_diff['+'].remove(v['wiki'])
            newArtistLinks.append(k)
        if len(newArtistLinks) > 0:
            diff[u'artist_links'] = {'new': newArtistLinks, 'old': []}
    # output remaining to log
    for k, v in artist_diff.iteritems():
        if len(v) > 0:
            log += u'difference in artist links, linkdiff%s: %s\n' % (k, ';'.join(v))

    ## akas
    if 'aka' not in diff.keys():
        pass
    elif sorted(diff['aka']['new'].split(';')) == sorted(diff['aka']['old'].split(';')):
        del(diff['aka'])
    else:
        aka_diff = {'+': [], '-': []}
        aka_list = list(set(diff['aka']['new'].split(';'))-set(diff['aka']['old'].split(';')))
        if aka_list and len(''.join(aka_list)) > 0:
            aka_diff['+'] = aka_list[:]  # slice to clone the list
        aka_list = list(set(diff['aka']['old'].split(';'))-set(diff['aka']['new'].split(';')))
        if aka_list and len(''.join(aka_list)) > 0:
            aka_diff['-'] = aka_list[:]  # slice to clone the list
        # handler can only deal with new akas
        if len(aka_diff['-']) == 0 and len(aka_diff['+']) > 0:
            diff[u'aka_list'] = {'new': aka_diff['+'], 'old': []}
            del(aka_diff['+'])
        # output remaining to log
        for k, v in aka_diff.iteritems():
            if len(v) > 0:
                log += u'difference in akas, diff%s: %s\n' % (k, ';'.join(v))
        # remove these for now
        del(diff['aka'])

    ## Post-processing
    # fotnot-namn without fotnot - needs to look-up fotnot for o:cmt
    if wikiObj[u'fotnot-namn'] and not wikiObj[u'fotnot']:
        log += u'fotnot-namn so couldn\'t compare, fotnot-namn: %s\n' % wikiObj[u'fotnot-namn']
        if u'cmt' in diff.keys():
            del diff[u'cmt']

    # free defaults to unfree in wiki but not necessarily in db
    if 'free' in diff.keys() and diff['free']['new'] == '':
        if diff['free']['old'] == 'unfree':
            diff.pop('free')

    # Years which are not plain numbers cannot be sent to db
    if 'year' in diff.keys():
        if not common.is_int(diff['year']['new']):
            year = diff.pop('year')
            log += u'Non-integer year: %s\n' % year['new']

    # lat/lon reqires an extra touch as only decimal numbers and nones may be sent to db
    if 'lat' in diff.keys():
        if not diff['lat']['new']:
            # if new = None
            pass
        elif not common.is_number(diff['lat']['new']):
            lat = diff.pop('lat')
            log += u'Non-decimal lat: %s\n' % lat['new']
    if 'lon' in diff.keys():
        if not diff['lon']['new']:
            pass
        elif not common.is_number(diff['lon']['new']):
            lat = diff.pop('lon')
            log += u'Non-decimal lon: %s\n' % diff['lon']['new']

    # Basic validation of artist field:
    if 'artist' in diff.keys():
        # check that number of artists is the same
        if '[' in diff['artist']['old']:
            artist = diff.pop('artist')
            log += u'cannot deal with artists which include group affilitations: %s --> %s\n' % (artist['old'], artist['new'])
        elif (len(diff['artist']['old'].split(';')) != len(diff['artist']['new'].split(';'))) and (len(diff['artist']['old']) > 0):
            # if not the same number when there were originally some artists
            artist = diff.pop('artist')
            log += u'difference in number of artists: %s --> %s\n' % (artist['old'], artist['new'])

    # Unstripped refrences
    for k in diff.keys():
        if k in (u'official_url', u'inside', u'removed'):  # not strings or ok to have http
            continue
        if diff[k]['new'] and 'http' in diff[k]['new']:
            val = diff.pop(k)
            log += u'new value for %s seems to include a url: %s --> %s\n' % (k, val['old'], val['new'])

    return (diff, log)
Ejemplo n.º 13
0
    '''
    takes wikiformated text and returns unformated text with any links sent separately
    :parm wikitext: wikitext to be processed
    :return: (text, links)
            text: unformated text
            links: a list of any links found in the text
    '''
    if isinstance(wikitext, unicode):
        return common.extractAllLinks(wikitext, kill_tags=True)
    else:
        return wikitext, None


if __name__ == "__main__":
    import sys
    usage = '''Usage: python synking.py days
\tdays(optional): number of days back to search for changes (default 100)'''
    argv = sys.argv[1:]
    if len(argv) == 0:
        run()
    elif len(argv) == 1:
        if common.is_int(argv[0]):
            days = int(argv[0])
            print 'running for %d days' % days
            run(days=days)
        else:
            print usage
    else:
        print usage
# EoF
Ejemplo n.º 14
0
def compareToDB(wikiObj, odokObj, wpApi, dbReadSQL, verbose=False):
    '''
    compares a listobj to equiv obj in database
    this needs to deal with links and wikitext
    this should check clash parameter

    should return (diff, log)
            diff: dict of changes (if any) otherwise NONE
            log: list of issues encountered e.g. incorrecly formated wikitext
    TODO:
        proper log for coordinates
        only care about first X decimals in coordinate
        return needed/removed links
        fotnot-name
        should anything be done with:
            * odok:u'same_as'
            * odok:u'year_cmt'
    '''
    # wikiObj.keys() = [u'typ', u'artikel', u'titel', 'clash', u'inomhus', u'material', u'döljStadsdel', u'län', u'konstnär2',
    #                   u'konstnär3', u'konstnär4', u'konstnär5', u'konstnär6', u'konstnär7', u'konstnär8', u'konstnär9',
    #                   u'döljKommun', u'lat', u'plats', u'fotnot', u'fotnot2', u'fotnot3', u'id', u'kommun',
    #                   u'bild', u'stadsdel', u'commonscat', u'fri', u'konstnär', u'lon', u'beskrivning', u'årtal', u'id-länk',
    #                   u'fotnot-namn', u'fotnot2-namn', u'fotnot3-namn', u'aka', u'page', u'lista', u'header']
    # odokObj.keys() = [u'changed', u'official_url', u'ugc', u'image', u'county', u'year', u'owner', u'commons_cat', u'id',
    #                   u'wiki', u'list', u'descr', u'title', u'lon', u'source', u'same_as', u'type', u'muni', u'material', u'free',
    #                   u'district', u'address', u'lat', u'year_cmt', u'artist', u'inside', u'created', u'cmt', u'removed']

    log = ''
    if wikiObj['clash']:
        log += u'clash with another page. Don\'t know how to resolve this. Skipping: %s\n' % wikiObj['clash']
        return (None, log)

    ## Pre-processing
    # get some more things from ODOK
    odokObj[u'linked_artists'] = dbReadSQL.findArtist(wikiObj[u'id'])
    odokObj[u'artist_links'] = []
    for a in odokObj[u'linked_artists']:
        odokObj[u'artist_links'].append(a['wiki'])
    odokObj[u'aka'] = ''
    akas = dbReadSQL.findAkas(wikiObj[u'id'])
    if akas:
        odokObj[u'aka'] = []
        for a in akas:
            odokObj[u'aka'].append(a['aka'])
        odokObj[u'aka'] = ';'.join(odokObj[u'aka'])
    if odokObj[u'wiki']:
        odokObj[u'wiki'] = odokObj[u'wiki'].upper()

    # the following is inherited from the header
    if wikiObj[u'header'][u'tidigare']:
        wikiObj[u'tidigare'] = 1
    else:
        wikiObj[u'tidigare'] = 0

    # the following may be inherited from the header
    if wikiObj[u'döljKommun']:
        wikiObj[u'kommun'] = wikiObj[u'header'][u'kommun']
    if not wikiObj[u'län']:
        wikiObj[u'län'] = wikiObj[u'header'][u'län']
    if wikiObj[u'döljStadsdel'] and not wikiObj[u'stadsdel']:  # only overwrite non existant
        wikiObj[u'stadsdel'] = wikiObj[u'header'][u'stadsdel']
    # the following are limited in their values but need mapping from wiki to odok before comparison
    if wikiObj[u'fri'].lower() == 'nej':
        wikiObj[u'fri'] = 'unfree'
    if wikiObj[u'inomhus']:
        if wikiObj[u'inomhus'].lower() == 'ja':
            wikiObj[u'inomhus'] = 1
        elif wikiObj[u'inomhus'].lower() == 'nej':
            wikiObj[u'inomhus'] = 0
        else:
            log += 'unexpected value for inside-parameter (defaulting to no): %s\n' % wikiObj[u'inomhus']
            wikiObj[u'inomhus'] = 0
    else:
        wikiObj[u'inomhus'] = 0
    if wikiObj[u'kommun']:  # need muni code
        wikiObj[u'kommun'] = dataDict.muni_name2code[wikiObj[u'kommun']]
    if wikiObj[u'län'].startswith(u'SE-'):
        wikiObj[u'län'] = wikiObj[u'län'][len(u'SE-'):]
    if wikiObj[u'lat'] == '':
        wikiObj[u'lat'] = None
    else:
        if len(wikiObj[u'lat']) > 16:
            wikiObj[u'lat'] = '%.13f' % float(wikiObj[u'lat'])
        wikiObj[u'lat'] = wikiObj[u'lat'].strip('0')  # due to how numbers are stored
    if wikiObj[u'lon'] == '':
        wikiObj[u'lon'] = None
    else:
        if len(wikiObj[u'lon']) > 16:
            wikiObj[u'lon'] = '%.13f' % float(wikiObj[u'lon'])
        wikiObj[u'lon'] = wikiObj[u'lon'].strip('0')  # due to how numbers are stored
    if wikiObj[u'årtal'] == '':
        wikiObj[u'årtal'] = None

    # Deal with artists (does not deal with order of artists being changed):
    artist_param = [u'konstnär', u'konstnär2', u'konstnär3',
                    u'konstnär4', u'konstnär5', u'konstnär6',
                    u'konstnär7', u'konstnär8', u'konstnär9']
    wikiObj[u'artists'] = ''
    artists_links = {}
    for a in artist_param:
        if wikiObj[a]:
            (w_text, w_links) = unwiki(wikiObj[a])
            wikiObj[u'artists'] = u'%s%s;' % (wikiObj[u'artists'], w_text)
            if w_links:
                artists_links[w_text] = w_links[0]
    if wikiObj[u'artists']:
        wikiObj[u'artists'] = wikiObj[u'artists'][:-1]  # trim trailing ;

    ## dealing with links:
    links = artists_links.values()
    if wikiObj[u'artikel']:
        if u'#' in wikiObj[u'artikel']:
            log += u'link to section: %s\n' % wikiObj[u'artikel']
        else:
            links.append(wikiObj[u'artikel'])
    if links:
        links = wpApi.getPageInfo(links)
        for k, v in links.iteritems():
            if u'disambiguation' in v.keys():
                log += u'link to disambigpage: %s\n' % k
                links[k] = ''
            elif u'wikidata' in v.keys():
                links[k] = v[u'wikidata']
            else:
                links[k] = ''
    else:
        links = {}
    # Stick wikidata back into parameters
    if wikiObj[u'artikel']:
        if u'#' not in wikiObj[u'artikel']:
            wikiObj[u'artikel'] = links.pop(wikiObj[u'artikel'])
        else:
            wikiObj[u'artikel'] = ''
    wikiObj[u'artist_links'] = links.values()

    ## Main-process
    diff = {}
    # easy to compare {wiki:odok}
    trivial_params = {u'typ': u'type',
                      u'material': u'material',
                      u'id-länk': u'official_url',
                      u'fri': u'free',
                      u'inomhus': u'inside',
                      u'artists': u'artist',
                      u'årtal': u'year',
                      u'commonscat': u'commons_cat',
                      u'beskrivning': u'descr',
                      u'bild': u'image',
                      u'titel': u'title',
                      u'aka': u'aka',
                      u'artikel': u'wiki',
                      u'list': u'list',
                      u'plats': u'address',
                      u'län': u'county',
                      u'kommun': u'muni',
                      u'stadsdel': u'district',
                      u'tidigare': u'removed',
                      u'lat': u'lat',
                      u'lon': u'lon',
                      u'fotnot': u'cmt'}

    for k, v in trivial_params.iteritems():
        (w_text, w_links) = unwiki(wikiObj[k])
        if not (w_text == odokObj[v]):
            diff[v] = {'new': w_text, 'old': odokObj[v]}
            if verbose:
                print u'%s:"%s"    <--->   %s:"%s"' % (k, w_text, v, odokObj[v])

    ## Needing separate treatment
    # comparing artist_links: u'artist_links':u'artist_links'
    artist_diff = {'+': [], '-': []}
    artist_links = list(set(wikiObj[u'artist_links'])-set(odokObj[u'artist_links']))
    if artist_links and len(''.join(artist_links)) > 0:
        artist_diff['+'] = artist_links[:]  # slice to clone the list
    artist_links = list(set(odokObj[u'artist_links'])-set(wikiObj[u'artist_links']))
    if artist_links and len(''.join(artist_links)) > 0:
        artist_diff['-'] = artist_links[:]  # slice to clone the list
    # handler can only deal with new artists
    if len(artist_diff['-']) == 0 and len(artist_diff['+']) > 0:
        artIds = dbReadSQL.getArtistByWiki(artist_diff['+'])  # list of id:{'first_name', 'last_name', 'wiki', 'birth_date', 'death_date', 'birth_year', 'death_year'}
        newArtistLinks = []
        for k, v in artIds.iteritems():
            artist_diff['+'].remove(v['wiki'])
            newArtistLinks.append(k)
        if len(newArtistLinks) > 0:
            diff[u'artist_links'] = {'new': newArtistLinks, 'old': []}
    # output remaining to log
    for k, v in artist_diff.iteritems():
        if len(v) > 0:
            log += u'difference in artist links, linkdiff%s: %s\n' % (k, ';'.join(v))

    ## akas
    if 'aka' not in diff.keys():
        pass
    elif sorted(diff['aka']['new'].split(';')) == sorted(diff['aka']['old'].split(';')):
        del(diff['aka'])
    else:
        aka_diff = {'+': [], '-': []}
        aka_list = list(set(diff['aka']['new'].split(';'))-set(diff['aka']['old'].split(';')))
        if aka_list and len(''.join(aka_list)) > 0:
            aka_diff['+'] = aka_list[:]  # slice to clone the list
        aka_list = list(set(diff['aka']['old'].split(';'))-set(diff['aka']['new'].split(';')))
        if aka_list and len(''.join(aka_list)) > 0:
            aka_diff['-'] = aka_list[:]  # slice to clone the list
        # handler can only deal with new akas
        if len(aka_diff['-']) == 0 and len(aka_diff['+']) > 0:
            diff[u'aka_list'] = {'new': aka_diff['+'], 'old': []}
            del(aka_diff['+'])
        # output remaining to log
        for k, v in aka_diff.iteritems():
            if len(v) > 0:
                log += u'difference in akas, diff%s: %s\n' % (k, ';'.join(v))
        # remove these for now
        del(diff['aka'])

    ## Post-processing
    # fotnot-namn without fotnot - needs to look-up fotnot for o:cmt
    if wikiObj[u'fotnot-namn'] and not wikiObj[u'fotnot']:
        log += u'fotnot-namn so couldn\'t compare, fotnot-namn: %s\n' % wikiObj[u'fotnot-namn']
        if u'cmt' in diff.keys():
            del diff[u'cmt']

    # free defaults to unfree in wiki but not necessarily in db
    if 'free' in diff.keys() and diff['free']['new'] == '':
        if diff['free']['old'] == 'unfree':
            diff.pop('free')

    # Years which are not plain numbers cannot be sent to db
    if 'year' in diff.keys():
        if not common.is_int(diff['year']['new']):
            year = diff.pop('year')
            log += u'Non-integer year: %s\n' % year['new']

    # lat/lon reqires an extra touch as only decimal numbers and nones may be sent to db
    if 'lat' in diff.keys():
        if not diff['lat']['new']:
            # if new = None
            pass
        elif not common.is_number(diff['lat']['new']):
            lat = diff.pop('lat')
            log += u'Non-decimal lat: %s\n' % lat['new']
    if 'lon' in diff.keys():
        if not diff['lon']['new']:
            pass
        elif not common.is_number(diff['lon']['new']):
            lat = diff.pop('lon')
            log += u'Non-decimal lon: %s\n' % diff['lon']['new']

    # Basic validation of artist field:
    if 'artist' in diff.keys():
        # check that number of artists is the same
        if '[' in diff['artist']['old']:
            artist = diff.pop('artist')
            log += u'cannot deal with artists which include group affilitations: %s --> %s\n' % (artist['old'], artist['new'])
        elif (len(diff['artist']['old'].split(';')) != len(diff['artist']['new'].split(';'))) and (len(diff['artist']['old']) > 0):
            # if not the same number when there were originally some artists
            artist = diff.pop('artist')
            log += u'difference in number of artists: %s --> %s\n' % (artist['old'], artist['new'])

    # Unstripped refrences
    for k in diff.keys():
        if k in (u'official_url', u'inside', u'removed'):  # not strings or ok to have http
            continue
        if diff[k]['new'] and 'http' in diff[k]['new']:
            val = diff.pop(k)
            log += u'new value for %s seems to include a url: %s --> %s\n' % (k, val['old'], val['new'])

    return (diff, log)
Ejemplo n.º 15
0
    '''
    takes wikiformated text and returns unformated text with any links sent separately
    :parm wikitext: wikitext to be processed
    :return: (text, links)
            text: unformated text
            links: a list of any links found in the text
    '''
    if isinstance(wikitext, unicode):
        return common.extractAllLinks(wikitext, kill_tags=True)
    else:
        return wikitext, None


if __name__ == "__main__":
    import sys
    usage = '''Usage: python synking.py days
\tdays(optional): number of days back to search for changes (default 100)'''
    argv = sys.argv[1:]
    if len(argv) == 0:
        run()
    elif len(argv) == 1:
        if common.is_int(argv[0]):
            days = int(argv[0])
            print 'running for %d days' % days
            run(days=days)
        else:
            print usage
    else:
        print usage
# EoF