Beispiel #1
0
class QUAL(object):
    EXCELLENT = 5
    GOOD = 4
    OK = 3
    POOR = 2
    JUNK = 1
    UNKNOWN = None

    INT_TO_CODE = ut.odict([
        (EXCELLENT, 'excellent'),
        (GOOD, 'good'),
        (OK, 'ok'),
        (POOR, 'poor'),
        (JUNK, 'junk'),
        (UNKNOWN, 'unspecified'),
    ])

    INT_TO_NICE = ut.odict([
        (EXCELLENT, 'Excellent'),
        (GOOD, 'Good'),
        (OK, 'OK'),
        (POOR, 'Poor'),
        (JUNK, 'Junk'),
        (UNKNOWN, 'Unspecified'),
    ])

    CODE_TO_NICE = ut.map_keys(INT_TO_CODE, INT_TO_NICE)
    CODE_TO_INT = ut.invert_dict(INT_TO_CODE)
    NICE_TO_CODE = ut.invert_dict(CODE_TO_NICE)
    NICE_TO_INT = ut.invert_dict(INT_TO_NICE)
Beispiel #2
0
class CONFIDENCE(object):
    UNKNOWN = None
    GUESSING = 1
    NOT_SURE = 2
    PRETTY_SURE = 3
    ABSOLUTELY_SURE = 4

    INT_TO_CODE = ut.odict([
        (ABSOLUTELY_SURE, 'absolutely_sure'),
        (PRETTY_SURE, 'pretty_sure'),
        (NOT_SURE, 'not_sure'),
        (GUESSING, 'guessing'),
        (UNKNOWN, 'unspecified'),
    ])

    INT_TO_NICE = ut.odict([
        (ABSOLUTELY_SURE, 'Doubtless'),
        (PRETTY_SURE, 'Sure'),
        (NOT_SURE, 'Unsure'),
        (GUESSING, 'Guessing'),
        (UNKNOWN, 'Unspecified'),
    ])

    CODE_TO_NICE = ut.map_keys(INT_TO_CODE, INT_TO_NICE)
    CODE_TO_INT = ut.invert_dict(INT_TO_CODE)
    NICE_TO_CODE = ut.invert_dict(CODE_TO_NICE)
    NICE_TO_INT = ut.invert_dict(INT_TO_NICE)
Beispiel #3
0
class META_DECISION(object):  # NOQA
    """
    Enumerated types of review codes and texts

    Notes:
        unreviewed: we dont have a meta decision
        same: we know this is the same animal through non-visual means
        diff: we know this is the different animal through non-visual means

    Example:
        >>> # ENABLE_DOCTEST
        >>> from wbia.constants import *  # NOQA
        >>> assert hasattr(META_DECISION, 'CODE')
        >>> assert hasattr(META_DECISION, 'NICE')
        >>> code1 = META_DECISION.INT_TO_CODE[META_DECISION.NULL]
        >>> code2 = META_DECISION.CODE.NULL
        >>> assert code1 == code2
        >>> nice1 = META_DECISION.INT_TO_NICE[META_DECISION.NULL]
        >>> nice2 = META_DECISION.NICE.NULL
        >>> assert nice1 == nice2
    """

    NULL = None
    DIFF = 0
    SAME = 1
    INT_TO_CODE = ut.odict([(NULL, 'null'), (DIFF, 'diff'), (SAME, 'same')])
    INT_TO_NICE = ut.odict([(NULL, 'NULL'), (DIFF, 'Different'),
                            (SAME, 'Same')])
    CODE_TO_NICE = ut.map_keys(INT_TO_CODE, INT_TO_NICE)
    CODE_TO_INT = ut.invert_dict(INT_TO_CODE)
    NICE_TO_CODE = ut.invert_dict(CODE_TO_NICE)
    NICE_TO_INT = ut.invert_dict(INT_TO_NICE)
Beispiel #4
0
class EVIDENCE_DECISION(object):  # NOQA
    """
    TODO: change to EVIDENCE_DECISION / VISUAL_DECISION
    Enumerated types of review codes and texts

    Notes:
        Unreviewed: Not comparared yet.
        nomatch: Visually comparable and the different
        match: Visually comparable and the same
        notcomp: Not comparable means it is actually impossible to determine.
        unknown: means that it was reviewed, but we just can't figure it out.
    """

    UNREVIEWED = None
    NEGATIVE = 0
    POSITIVE = 1
    INCOMPARABLE = 2
    UNKNOWN = 3

    INT_TO_CODE = ut.odict([
        (POSITIVE, 'match'),
        (NEGATIVE, 'nomatch'),
        (INCOMPARABLE, 'notcomp'),
        (UNKNOWN, 'unknown'),
        (UNREVIEWED, 'unreviewed'),
    ])

    INT_TO_NICE = ut.odict([
        (POSITIVE, 'Positive'),
        (NEGATIVE, 'Negative'),
        (INCOMPARABLE, 'Incomparable'),
        (UNKNOWN, 'Unknown'),
        (UNREVIEWED, 'Unreviewed'),
    ])

    CODE_TO_NICE = ut.map_keys(INT_TO_CODE, INT_TO_NICE)
    CODE_TO_INT = ut.invert_dict(INT_TO_CODE)
    NICE_TO_CODE = ut.invert_dict(CODE_TO_NICE)
    NICE_TO_INT = ut.invert_dict(INT_TO_NICE)

    MATCH_CODE = CODE_TO_INT
Beispiel #5
0
class VIEW(object):
    """
    categorical viewpoint using the faces of a Rhombicuboctahedron

    References:
        https://en.wikipedia.org/wiki/Rhombicuboctahedron
    """

    UNKNOWN = None
    R = 1
    FR = 2
    F = 3
    FL = 4
    L = 5
    BL = 6
    B = 7
    BR = 8

    U = 9
    UF = 10
    UB = 11
    UL = 12
    UR = 13
    UFL = 14
    UFR = 15
    UBL = 16
    UBR = 17

    D = 18
    DF = 19
    DB = 20
    DL = 21
    DR = 22
    DFL = 23
    DFR = 24
    DBL = 25
    DBR = 26

    INT_TO_CODE = ut.odict([
        (UNKNOWN, 'unknown'),
        (R, 'right'),
        (FR, 'frontright'),
        (F, 'front'),
        (FL, 'frontleft'),
        (L, 'left'),
        (BL, 'backleft'),
        (B, 'back'),
        (BR, 'backright'),
        (U, 'up'),
        (UF, 'upfront'),
        (UB, 'upback'),
        (UL, 'upleft'),
        (UR, 'upright'),
        (UFL, 'upfrontleft'),
        (UFR, 'upfrontright'),
        (UBL, 'upbackleft'),
        (UBR, 'upbackright'),
        (D, 'down'),
        (DF, 'downfront'),
        (DB, 'downback'),
        (DL, 'downleft'),
        (DR, 'downright'),
        (DFL, 'downfrontleft'),
        (DFR, 'downfrontright'),
        (DBL, 'downbackleft'),
        (DBR, 'downbackright'),
    ])

    INT_TO_NICE = ut.odict([
        (UNKNOWN, 'Unknown'),
        (R, 'Right'),
        (FR, 'Front-Right'),
        (F, 'Front'),
        (FL, 'Front-Left'),
        (L, 'Left'),
        (BL, 'Back-Left'),
        (B, 'Back'),
        (BR, 'Back-Right'),
        (U, 'Up'),
        (UF, 'Up-Front'),
        (UB, 'Up-Back'),
        (UL, 'Up-Left'),
        (UR, 'Up-Right'),
        (UFL, 'Up-Front-Left'),
        (UFR, 'Up-Front-Right'),
        (UBL, 'Up-Back-Left'),
        (UBR, 'Up-Back-Right'),
        (D, 'Down'),
        (DF, 'Down-Front'),
        (DB, 'Down-Back'),
        (DL, 'Down-Left'),
        (DR, 'Down-Right'),
        (DFL, 'Down-Front-Left'),
        (DFR, 'Down-Front-Right'),
        (DBL, 'Down-Back-Left'),
        (DBR, 'Down-Back-Right'),
    ])

    CODE_TO_NICE = ut.map_keys(INT_TO_CODE, INT_TO_NICE)
    CODE_TO_INT = ut.invert_dict(INT_TO_CODE)
    NICE_TO_CODE = ut.invert_dict(CODE_TO_NICE)
    NICE_TO_INT = ut.invert_dict(INT_TO_NICE)

    DIST = {
        # DIST 0 PAIRS
        (B, B): 0,
        (BL, BL): 0,
        (BR, BR): 0,
        (D, D): 0,
        (DB, DB): 0,
        (DBL, DBL): 0,
        (DBR, DBR): 0,
        (DF, DF): 0,
        (DFL, DFL): 0,
        (DFR, DFR): 0,
        (DL, DL): 0,
        (DR, DR): 0,
        (F, F): 0,
        (FL, FL): 0,
        (FR, FR): 0,
        (L, L): 0,
        (R, R): 0,
        (U, U): 0,
        (UB, UB): 0,
        (UBL, UBL): 0,
        (UBR, UBR): 0,
        (UF, UF): 0,
        (UFL, UFL): 0,
        (UFR, UFR): 0,
        (UL, UL): 0,
        (UR, UR): 0,
        # DIST 1 PAIRS
        (B, BL): 1,
        (B, BR): 1,
        (B, DB): 1,
        (B, DBL): 1,
        (B, DBR): 1,
        (B, UB): 1,
        (B, UBL): 1,
        (B, UBR): 1,
        (BL, DBL): 1,
        (BL, L): 1,
        (BL, UBL): 1,
        (BR, DBR): 1,
        (BR, R): 1,
        (BR, UBR): 1,
        (D, DB): 1,
        (D, DBL): 1,
        (D, DBR): 1,
        (D, DF): 1,
        (D, DFL): 1,
        (D, DFR): 1,
        (D, DL): 1,
        (D, DR): 1,
        (DB, DBL): 1,
        (DB, DBR): 1,
        (DBL, DL): 1,
        (DBL, L): 1,
        (DBR, DR): 1,
        (DBR, R): 1,
        (DF, DFL): 1,
        (DF, DFR): 1,
        (DF, F): 1,
        (DFL, DL): 1,
        (DFL, F): 1,
        (DFL, FL): 1,
        (DFL, L): 1,
        (DFR, DR): 1,
        (DFR, F): 1,
        (DFR, FR): 1,
        (DFR, R): 1,
        (DL, L): 1,
        (DR, R): 1,
        (F, FL): 1,
        (F, FR): 1,
        (F, UF): 1,
        (F, UFL): 1,
        (F, UFR): 1,
        (FL, L): 1,
        (FL, UFL): 1,
        (FR, R): 1,
        (FR, UFR): 1,
        (L, UBL): 1,
        (L, UFL): 1,
        (L, UL): 1,
        (R, UBR): 1,
        (R, UFR): 1,
        (R, UR): 1,
        (U, UB): 1,
        (U, UBL): 1,
        (U, UBR): 1,
        (U, UF): 1,
        (U, UFL): 1,
        (U, UFR): 1,
        (U, UL): 1,
        (U, UR): 1,
        (UB, UBL): 1,
        (UB, UBR): 1,
        (UBL, UL): 1,
        (UBR, UR): 1,
        (UF, UFL): 1,
        (UF, UFR): 1,
        (UFL, UL): 1,
        (UFR, UR): 1,
        # DIST 2 PAIRS
        (B, D): 2,
        (B, DL): 2,
        (B, DR): 2,
        (B, L): 2,
        (B, R): 2,
        (B, U): 2,
        (B, UL): 2,
        (B, UR): 2,
        (BL, BR): 2,
        (BL, D): 2,
        (BL, DB): 2,
        (BL, DBR): 2,
        (BL, DFL): 2,
        (BL, DL): 2,
        (BL, FL): 2,
        (BL, U): 2,
        (BL, UB): 2,
        (BL, UBR): 2,
        (BL, UFL): 2,
        (BL, UL): 2,
        (BR, D): 2,
        (BR, DB): 2,
        (BR, DBL): 2,
        (BR, DFR): 2,
        (BR, DR): 2,
        (BR, FR): 2,
        (BR, U): 2,
        (BR, UB): 2,
        (BR, UBL): 2,
        (BR, UFR): 2,
        (BR, UR): 2,
        (D, F): 2,
        (D, FL): 2,
        (D, FR): 2,
        (D, L): 2,
        (D, R): 2,
        (DB, DF): 2,
        (DB, DFL): 2,
        (DB, DFR): 2,
        (DB, DL): 2,
        (DB, DR): 2,
        (DB, L): 2,
        (DB, R): 2,
        (DB, UB): 2,
        (DB, UBL): 2,
        (DB, UBR): 2,
        (DBL, DBR): 2,
        (DBL, DF): 2,
        (DBL, DFL): 2,
        (DBL, DFR): 2,
        (DBL, DR): 2,
        (DBL, FL): 2,
        (DBL, UB): 2,
        (DBL, UBL): 2,
        (DBL, UBR): 2,
        (DBL, UFL): 2,
        (DBL, UL): 2,
        (DBR, DF): 2,
        (DBR, DFL): 2,
        (DBR, DFR): 2,
        (DBR, DL): 2,
        (DBR, FR): 2,
        (DBR, UB): 2,
        (DBR, UBL): 2,
        (DBR, UBR): 2,
        (DBR, UFR): 2,
        (DBR, UR): 2,
        (DF, DL): 2,
        (DF, DR): 2,
        (DF, FL): 2,
        (DF, FR): 2,
        (DF, L): 2,
        (DF, R): 2,
        (DF, UF): 2,
        (DF, UFL): 2,
        (DF, UFR): 2,
        (DFL, DFR): 2,
        (DFL, DR): 2,
        (DFL, FR): 2,
        (DFL, UBL): 2,
        (DFL, UF): 2,
        (DFL, UFL): 2,
        (DFL, UFR): 2,
        (DFL, UL): 2,
        (DFR, DL): 2,
        (DFR, FL): 2,
        (DFR, UBR): 2,
        (DFR, UF): 2,
        (DFR, UFL): 2,
        (DFR, UFR): 2,
        (DFR, UR): 2,
        (DL, DR): 2,
        (DL, F): 2,
        (DL, FL): 2,
        (DL, UBL): 2,
        (DL, UFL): 2,
        (DL, UL): 2,
        (DR, F): 2,
        (DR, FR): 2,
        (DR, UBR): 2,
        (DR, UFR): 2,
        (DR, UR): 2,
        (F, L): 2,
        (F, R): 2,
        (F, U): 2,
        (F, UL): 2,
        (F, UR): 2,
        (FL, FR): 2,
        (FL, U): 2,
        (FL, UBL): 2,
        (FL, UF): 2,
        (FL, UFR): 2,
        (FL, UL): 2,
        (FR, U): 2,
        (FR, UBR): 2,
        (FR, UF): 2,
        (FR, UFL): 2,
        (FR, UR): 2,
        (L, U): 2,
        (L, UB): 2,
        (L, UF): 2,
        (R, U): 2,
        (R, UB): 2,
        (R, UF): 2,
        (UB, UF): 2,
        (UB, UFL): 2,
        (UB, UFR): 2,
        (UB, UL): 2,
        (UB, UR): 2,
        (UBL, UBR): 2,
        (UBL, UF): 2,
        (UBL, UFL): 2,
        (UBL, UFR): 2,
        (UBL, UR): 2,
        (UBR, UF): 2,
        (UBR, UFL): 2,
        (UBR, UFR): 2,
        (UBR, UL): 2,
        (UF, UL): 2,
        (UF, UR): 2,
        (UFL, UFR): 2,
        (UFL, UR): 2,
        (UFR, UL): 2,
        (UL, UR): 2,
        # DIST 3 PAIRS
        (B, DF): 3,
        (B, DFL): 3,
        (B, DFR): 3,
        (B, FL): 3,
        (B, FR): 3,
        (B, UF): 3,
        (B, UFL): 3,
        (B, UFR): 3,
        (BL, DF): 3,
        (BL, DFR): 3,
        (BL, DR): 3,
        (BL, F): 3,
        (BL, R): 3,
        (BL, UF): 3,
        (BL, UFR): 3,
        (BL, UR): 3,
        (BR, DF): 3,
        (BR, DFL): 3,
        (BR, DL): 3,
        (BR, F): 3,
        (BR, L): 3,
        (BR, UF): 3,
        (BR, UFL): 3,
        (BR, UL): 3,
        (D, UB): 3,
        (D, UBL): 3,
        (D, UBR): 3,
        (D, UF): 3,
        (D, UFL): 3,
        (D, UFR): 3,
        (D, UL): 3,
        (D, UR): 3,
        (DB, F): 3,
        (DB, FL): 3,
        (DB, FR): 3,
        (DB, U): 3,
        (DB, UFL): 3,
        (DB, UFR): 3,
        (DB, UL): 3,
        (DB, UR): 3,
        (DBL, F): 3,
        (DBL, FR): 3,
        (DBL, R): 3,
        (DBL, U): 3,
        (DBL, UF): 3,
        (DBL, UR): 3,
        (DBR, F): 3,
        (DBR, FL): 3,
        (DBR, L): 3,
        (DBR, U): 3,
        (DBR, UF): 3,
        (DBR, UL): 3,
        (DF, U): 3,
        (DF, UBL): 3,
        (DF, UBR): 3,
        (DF, UL): 3,
        (DF, UR): 3,
        (DFL, R): 3,
        (DFL, U): 3,
        (DFL, UB): 3,
        (DFL, UR): 3,
        (DFR, L): 3,
        (DFR, U): 3,
        (DFR, UB): 3,
        (DFR, UL): 3,
        (DL, FR): 3,
        (DL, R): 3,
        (DL, U): 3,
        (DL, UB): 3,
        (DL, UBR): 3,
        (DL, UF): 3,
        (DL, UFR): 3,
        (DR, FL): 3,
        (DR, L): 3,
        (DR, U): 3,
        (DR, UB): 3,
        (DR, UBL): 3,
        (DR, UF): 3,
        (DR, UFL): 3,
        (F, UB): 3,
        (F, UBL): 3,
        (F, UBR): 3,
        (FL, R): 3,
        (FL, UB): 3,
        (FL, UBR): 3,
        (FL, UR): 3,
        (FR, L): 3,
        (FR, UB): 3,
        (FR, UBL): 3,
        (FR, UL): 3,
        (L, UBR): 3,
        (L, UFR): 3,
        (L, UR): 3,
        (R, UBL): 3,
        (R, UFL): 3,
        (R, UL): 3,
        # DIST 4 PAIRS
        (B, F): 4,
        (BL, FR): 4,
        (BR, FL): 4,
        (D, U): 4,
        (DB, UF): 4,
        (DBL, UFR): 4,
        (DBR, UFL): 4,
        (DF, UB): 4,
        (DFL, UBR): 4,
        (DFR, UBL): 4,
        (DL, UR): 4,
        (DR, UL): 4,
        (L, R): 4,
        # UNDEFINED DIST PAIRS
        (B, UNKNOWN): None,
        (BL, UNKNOWN): None,
        (BR, UNKNOWN): None,
        (D, UNKNOWN): None,
        (DB, UNKNOWN): None,
        (DBL, UNKNOWN): None,
        (DBR, UNKNOWN): None,
        (DF, UNKNOWN): None,
        (DFL, UNKNOWN): None,
        (DFR, UNKNOWN): None,
        (DL, UNKNOWN): None,
        (DR, UNKNOWN): None,
        (F, UNKNOWN): None,
        (FL, UNKNOWN): None,
        (FR, UNKNOWN): None,
        (L, UNKNOWN): None,
        (R, UNKNOWN): None,
        (U, UNKNOWN): None,
        (UB, UNKNOWN): None,
        (UBL, UNKNOWN): None,
        (UBR, UNKNOWN): None,
        (UF, UNKNOWN): None,
        (UFL, UNKNOWN): None,
        (UFR, UNKNOWN): None,
        (UL, UNKNOWN): None,
        (UNKNOWN, B): None,
        (UNKNOWN, BL): None,
        (UNKNOWN, BR): None,
        (UNKNOWN, D): None,
        (UNKNOWN, DB): None,
        (UNKNOWN, DBL): None,
        (UNKNOWN, DBR): None,
        (UNKNOWN, DF): None,
        (UNKNOWN, DFL): None,
        (UNKNOWN, DFR): None,
        (UNKNOWN, DL): None,
        (UNKNOWN, DR): None,
        (UNKNOWN, F): None,
        (UNKNOWN, FL): None,
        (UNKNOWN, FR): None,
        (UNKNOWN, L): None,
        (UNKNOWN, R): None,
        (UNKNOWN, U): None,
        (UNKNOWN, UB): None,
        (UNKNOWN, UBL): None,
        (UNKNOWN, UBR): None,
        (UNKNOWN, UF): None,
        (UNKNOWN, UFL): None,
        (UNKNOWN, UFR): None,
        (UNKNOWN, UL): None,
        (UNKNOWN, UR): None,
        (UR, UNKNOWN): None,
        (UNKNOWN, UNKNOWN): None,
    }
    # make distance symmetric
    for (f1, f2), d in list(DIST.items()):
        DIST[(f2, f1)] = d
def clean_tags():
    zotero = get_libzotero()
    # dict of all zotero items
    # items = zotero.index
    # get sql cursor
    cur = zotero.cur
    if False:
        sorted(ut.util_sqlite.get_tablenames(cur))
        ut.print_database_structure(cur)
        # Debug info about tags table in sql

        # The `tags` table stores all tags
        # The itemTags table stores the association between items and tags
        ut.get_table_columninfo_list(cur, 'fields')
        # ut.get_table_columninfo_list(cur, 'relations')
        ut.get_table_columninfo_list(cur, 'fieldsCombined')

        ut.get_table_columninfo_list(cur, 'itemData')
        ut.get_table_columninfo_list(cur, 'itemDataValues')

        ut.get_table_columninfo_list(cur, 'tags')
        ut.get_table_columninfo_list(cur, 'itemTags')

    import pandas as pd
    pd.options.display.max_colwidth = 40
    pd.options.display.max_rows = 20
    def pandas_sql(table, columns):
        return pd.DataFrame(ut.get_table_rows(cur, table, columns),
                            columns=columns)

    item_df = pandas_sql('items', ('itemID', 'itemTypeID', 'libraryID', 'key')).set_index('itemID', drop=False)
    tags_df = pandas_sql('tags', ('tagID', 'name', 'type', 'libraryID', 'key')).set_index('tagID', drop=False)
    itemData_df = pandas_sql('itemData', ('itemID', 'fieldID', 'valueID'))

    itemTag_df = pandas_sql('itemTags', ('itemID', 'tagID'))

    itemDataValues_df = pandas_sql('itemDataValues', ('valueID', 'value')).set_index('valueID')
    field_df = pandas_sql('fields', ('fieldID', 'fieldName', 'fieldFormatID')).set_index('fieldID')

    itemData_df['value'] = itemDataValues_df['value'].loc[itemData_df['valueID'].values].values
    itemData_df['fieldName'] = field_df['fieldName'].loc[itemData_df['fieldID'].values].values

    titles = itemData_df[itemData_df['fieldName'] == 'title']
    assert len(ut.unique(ut.map_vals(len, titles.groupby('itemID').indices).values())) == 1

    # itemTag_df.groupby('itemID').count()
    # Find how often each tag is used
    tagid_to_count = itemTag_df.groupby('tagID').count()
    tagid_to_count = tagid_to_count.rename(columns={'itemID': 'nItems'})
    tagid_to_count['name'] = tags_df.loc[tagid_to_count.index]['name']
    tagid_to_count = tagid_to_count.sort_values('nItems')

    bad_tags = tagid_to_count[tagid_to_count['nItems'] == 1]

    tagid_to_count['tag_ncharsize'] = tagid_to_count['name'].apply(len)
    tagid_to_count = tagid_to_count.sort_values('tag_ncharsize')
    bad_tags = tagid_to_count[tagid_to_count['tag_ncharsize'] > 25]['name'].values.tolist()

    def clean_tags2():
        api_key = 'fBDBqRPwW9O3mYyNLiksBKZy'
        base_url = 'https://api.zotero.org'
        library_id = '1279414'
        library_type = 'user'
        from pyzotero import zotero
        zot = zotero.Zotero(library_id, library_type, api_key)

        for chunk in ut.ProgChunks(bad_tags, 50):
            zot.delete_tags(*chunk)

    if False:
        api_key = 'fBDBqRPwW9O3mYyNLiksBKZy'
        base_url = 'https://api.zotero.org'
        user_id = '1279414'
        userOrGroupPrefix = '/users/' + user_id
        params = {'v': 3, 'key': api_key}

        items_resp = requests.get(base_url + userOrGroupPrefix + '/items', params=params)
        print(items_resp.content)
        print(items_resp)

        json_tags = []
        get_url = base_url + userOrGroupPrefix + '/tags'
        while True:
            print('get_url = %r' % (get_url,))
            tag_resp = requests.get(get_url, params=params)
            if tag_resp.status_code != 200:
                break
            json_tags.extend(tag_resp.json())
            if 'next' in tag_resp.links:
                get_url = tag_resp.links['next']['url']
            else:
                break

        version_to_tags = ut.ddict(list)
        bad_tags = []
        for tag in ut.ProgIter(json_tags, label='parsing tags'):
            # x = requests.get(tag['links']['self']['href'], params=params)
            if tag['meta']['numItems'] == 1:
                import urllib2
                try:
                    bad_tags.append(urllib2.quote(tag['tag']))
                except Exception as ex:
                    print('cant encode tag=%r' % (tag,))
                    pass

        for chunk in ut.ProgIter(ut.ichunks(bad_tags, 50), length=len(bad_tags) / 50):
            search_url = base_url + userOrGroupPrefix + '/items?tag=' + ' || '.join(chunk)
            r = requests.get(search_url, params=params)
            matching_items = r.json()
            # assert len(matching_items) == 1
            for item in matching_items:
                version = item['version']
            version_to_tags[item['version']].append(tag['tag'])

        # DELETE MULTIPLE TAGS
        import requests
        for chunk in ut.ichunks(bad_tags['name'], 50):
            import urllib2
            encoded_chunk = []
            for t in chunk:
                try:
                    encoded_chunk.append(urllib2.quote(t))
                except Exception:
                    print(t)
            suffix = ' || '.join(encoded_chunk)
            delete_url = base_url + userOrGroupPrefix + '/tags?' + suffix
            print('delete_url = %r' % (delete_url,))
            resp = requests.delete(delete_url, params=params)

        bad_tags = tagid_to_count[tagid_to_count['nItems'] == 1]
        bad_tags['tagID'] = bad_tags.index
        for tagid in bad_tags:
            delete from itemTags where tagID in (select tagID from tags where type=1);
        pass
        for name in k['name'].values.tolist()
    item_df['title'] = titles.set_index('itemID')['value']
    for idx, item in zotero.index.items():
        sql_title = item_df.loc[item.id]['title']
        if item.title != sql_title:
            if pd.isnull(sql_title) and item.title is not None:
                print(item.__dict__)
                print(item_df.loc[item.id])
                print('item.title = %r' % (item.title,))
                print('sql_title = %r' % (sql_title,))
                assert False

    duplicate_tags = [
        (name, idxs) for name, idxs in tags_df.groupby('name', sort=True).indices.items() if len(idxs) > 2
    ]
    tagname_to_tagid = tags_df.groupby('name', sort=True).first()
    new_to_oldtags = {}
    # Determine which tagi to use for each name
    for tagname, idxs in duplicate_tags:
        tags_subdf = tags_df.iloc[idxs]
        mapping = itemTag_df[itemTag_df['tagID'].isin(tags_subdf['tagID'])]
        tag_hist = mapping.groupby('tagID').count()
        best_tagid = tag_hist['itemID'].idxmax()

        new_to_oldtags[best_tagid] = set(tag_hist['itemID'].values) - {best_tagid}

        tagname_to_tagid.loc[tagname] = tags_df.loc[best_tagid]
        # for col in tagname_to_tagid.columns:
        #     tagname_to_tagid.loc[tagname][col] = tags_df.loc[best_tagid][col]
        # tags_df.loc[best_tagid]

    if False:
        # Update tagIds
        for newid, oldids in new_to_oldtags.items():
            for oldid in oldids:
                # cur.execute('SELECT itemID, tagID FROM itemTags WHERE tagID=?', (oldid,))
                import sqlite3
                try:
                    cmd = 'UPDATE itemTags SET tagID=? WHERE tagID=?'
                    args = (newid, oldid)
                    print('(%s) args = %r' % (cmd, args,))
                    cur.execute(cmd, args)
                    print(cur.fetchall())
                except sqlite3.IntegrityError:
                    print('error')
                    pass

    # tags_df.groupby('name', sort=True)

    # itemTag_df.groupby('itemID')
    # duptags = tags_df.iloc[tags_df.groupby('name', sort=True).indices['animals']]
    # duptags['tagID']
    # flags = itemTag_df['tagID'].isin(duptags['tagID'])
    # dup_rel = itemTag_df[flags]
    # item_df['title'].loc[dup_rel['itemID']].values
    # tags_df.iloc[tags_df.groupby('name', sort=True).indices['animals']]

    # tags_df[tags_df['type'] == 1]
    # tags_df[tags_df['type'] == 0]
    # tags_df['libraryID'].unique()
    # tags_df['type'].unique()

    '''
    SELECT
    SELECT FROM itemTags WHERE name in (animals)
    '''

    item_tag_pairs = ut.get_table_rows(cur, 'itemTags', ('itemID', 'tagID'))
    # Group tags by item
    itemid_to_tagids = ut.group_pairs(item_tag_pairs)
    # Group items by tags
    tagid_to_itemids = ut.group_pairs(map(tuple, map(reversed, item_tag_pairs)))

    # mapping from tagid to name
    tagid_to_name = dict(ut.get_table_rows(cur, 'tags', ('tagID', 'name')))

    tagid_freq = list(ut.sort_dict(ut.map_vals(len, tagid_to_itemids), 'vals').items())
    ut.sort_dict(ut.map_vals(sum, ut.group_pairs([(freq, tagid_to_name.get(tagid, tagid)) for tagid, freq in tagid_freq])), 'vals')
    tagname_freq = ut.map_keys(lambda k: tagid_to_name.get(k, k), tagid_freq)
Beispiel #7
0
def demo_refresh():
    r"""
    CommandLine:
        python -m ibeis.algo.graph.refresh demo_refresh \
                --num_pccs=40 --size=2 --show

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.algo.graph.refresh import *  # NOQA
        >>> demo_refresh()
        >>> ut.show_if_requested()
    """
    from ibeis.algo.graph import demo
    demokw = ut.argparse_dict({'num_pccs': 50, 'size': 4})
    refreshkw = ut.argparse_funckw(RefreshCriteria)
    # make an inference object
    infr = demo.demodata_infr(size_std=0, **demokw)
    edges = list(infr.dummy_verif.find_candidate_edges(K=100))
    scores = np.array(infr.dummy_verif.predict_edges(edges))
    sortx = scores.argsort()[::-1]
    edges = ut.take(edges, sortx)
    scores = scores[sortx]
    ys = infr.match_state_df(edges)[POSTV].values
    y_remainsum = ys[::-1].cumsum()[::-1]
    # Do oracle reviews and wait to converge
    refresh = RefreshCriteria(**refreshkw)
    xdata = []
    pprob_any = []
    rfrac_any = []
    for count, (edge, y) in enumerate(zip(edges, ys)):
        refresh.add(y, user_id='user:oracle')
        rfrac_any.append(y_remainsum[count] / y_remainsum[0])
        pprob_any.append(refresh.prob_any_remain())
        xdata.append(count + 1)
        if refresh.check():
            break
    xdata = xdata
    ydatas = ut.odict([
        ('Est. probability any remain', pprob_any),
        ('Fraction remaining', rfrac_any),
    ])

    ut.quit_if_noshow()
    import plottool_ibeis as pt
    pt.qtensure()
    from ibeis.scripts.thesis import TMP_RC
    import matplotlib as mpl
    mpl.rcParams.update(TMP_RC)
    pt.multi_plot(
        xdata, ydatas, xlabel='# manual reviews', rcParams=TMP_RC, marker='',
        ylim=(0, 1), use_legend=False,
    )
    demokw = ut.map_keys({'num_pccs': '#PCC', 'size': 'PCC size'},
                         demokw)
    thresh = refreshkw.pop('thresh')
    refreshkw['span'] = refreshkw.pop('window')
    pt.relative_text((.02, .58 + .0), ut.get_cfg_lbl(demokw, sep=' ')[1:],
                     valign='bottom')
    pt.relative_text((.02, .68 + .0), ut.get_cfg_lbl(refreshkw, sep=' ')[1:],
                     valign='bottom')
    legend = pt.gca().legend()
    legend.get_frame().set_alpha(1.0)
    pt.plt.plot([xdata[0], xdata[-1]], [thresh, thresh], 'g--', label='thresh')