Ejemplo n.º 1
0
    def query_field_of_study_for_paperid(self,
                                         paperid,
                                         table=None,
                                         col_paperid=None,
                                         col_fos=None):
        """Given a paper ID, return a list of Field of Study (FOS) IDs

        :paperid: paper ID (will also work with a list of paper IDs)
        :table: table object or name of table mapping papers to FOS
        :col_paperid: column (or name of column) for paper ID in the FOS table
        :col_fos: column (or name of column) for Field of Study ID in the FOS table
        :returns: list of FOS IDs (strings)

        """
        if table is None:
            table = self.tblname_paper_fos
        if col_paperid is None:
            col_paperid = self.colname_paperid
        if col_fos is None:
            col_fos = self.colname_paperfield
        tbl = self._get_table(table)
        col_paperid = self._get_col(col_paperid, tbl)
        col_fos = self._get_col(col_fos, tbl)

        paperid = parse_id(paperid)

        # get a list of FOS IDs
        sq = tbl.select().with_only_columns([col_fos])
        sq = sq.where(col_paperid.in_(paperid))
        r = self.engine.execute(sq).fetchall()
        return [str(row[0]) for row in r]
Ejemplo n.º 2
0
    def get_paperids_from_authorid(self, authorids,
                            table=None,
                            col_authorid=None,
                            col_paperid=None,
                            return_df=False):
        """Return the paper IDs associated with an author ID or list

        :authorids: author ID or list
        :table: table object or name of table mapping Author IDs to Paper IDs
        :col_authorid: column (or name of column) in the table for Author ID
        :col_paperid: column (or name of column) in the table for Paper ID
        :return_df: return a dataframe rather than a series (default False)
        :returns: either a Series of paper IDs or the full DataFrame of the table

        """
        if table is None:
            table = self.tblname_paper_authors
        if col_authorid is None:
            col_authorid = self.colname_authorid
        if col_paperid is None:
            col_paperid = self.colname_paperid

        tbl = self._get_table(table)
        col_authorid = self._get_col(col_authorid, tbl)
        col_paperid = self._get_col(col_paperid, tbl)

        authorids = parse_id(authorids)

        sq = tbl.select(col_authorid.in_(authorids))
        result = self.read_sql(sq)
        if return_df:
            return result
        if result.empty:
            return pd.Series()
        return result.ix[:, col_paperid.name]
Ejemplo n.º 3
0
    def query_nodes(self,
                    paperids,
                    table=None,
                    col_paperid=None,
                    return_type='dataframe'):
        """Given a paper ID or list, return results of querying the nodes table (title, publication year, etc.)

        :paperids: Paper ID or list
        :table: table object or name of table for nodes (papers)
        :col_paperid: column (or name of column) in the table for Paper ID
        :return_type: {'dataframe', 'dict'} if 'dataframe' (default) return a
        dataframe. if 'dict' return dictionary: {index -> {column -> value}}
        :returns: nodes dataframe or dictionary

        """
        if table is None:
            table = self.tblname_nodes
        if col_paperid is None:
            col_paperid = self.colname_paperid

        tbl = self._get_table(table)
        col_paperid = self._get_col(col_paperid, tbl)

        paperids = parse_id(paperids)

        sq = tbl.select(col_paperid.in_(paperids))
        df = self.read_sql(sq)
        df = df.set_index(col_paperid.name, drop=False)
        if return_type.lower().startswith('dict'):
            return df.to_dict(orient='index')
        else:
            return df
Ejemplo n.º 4
0
    def query_for_venue_counts(self, paperids, 
            table=None, 
            col_venue=None, 
            col_paperid=None):
        """given a paperid or list of paperids, return the counts by venue
        Venues are e.g. journals, specified by the 'col_venue' param

        :paperids: paperid or list of paperids
        :table: table object or name of table
        :col_venue: column (or name of column) to count
        :col_paper: column (or name of column) with paper ids
        :returns: SQLAchemy response with columns (count, venue_id)

        """
        if table is None:
            table=self.tblname_nodes
        if col_venue is None:
            col_venue = self.colname_venue[0]
        if col_paperid is None:
            col_paperid = self.colname_paperid
        paperids = parse_id(paperids)
        tbl = self._get_table(table)
        col_venue = self._get_col(col_venue, tbl)
        col_paperid = self._get_col(col_paperid, tbl)
        sq = tbl.count(col_paperid.in_(paperids))
        sq = sq.where(col_venue>0)  # TODO: THIS MAY NOT WORK WITH STRING IDS
        sq = sq.group_by(col_venue)
        sq = sq.column(col_venue)
        r = self.engine.execute(sq)
        return r
Ejemplo n.º 5
0
    def query_nodes(self, paperids,
                        table=None,
                        col_paperid=None,
                        return_type='dataframe'):
        """Given a paper ID or list, return results of querying the nodes table (title, publication year, etc.)

        :paperids: Paper ID or list
        :table: table object or name of table for nodes (papers)
        :col_paperid: column (or name of column) in the table for Paper ID
        :return_type: {'dataframe', 'dict'} if 'dataframe' (default) return a
        dataframe. if 'dict' return dictionary: {index -> {column -> value}}
        :returns: nodes dataframe or dictionary

        """
        if table is None:
            table=self.tblname_nodes
        if col_paperid is None:
            col_paperid = self.colname_paperid

        tbl = self._get_table(table)
        col_paperid = self._get_col(col_paperid, tbl)

        paperids = parse_id(paperids)

        sq = tbl.select(col_paperid.in_(paperids))
        df = self.read_sql(sq)
        df = df.set_index(col_paperid.name, drop=False)
        if return_type.lower().startswith('dict'):
            return df.to_dict(orient='index')
        else:
            return df
Ejemplo n.º 6
0
    def query_field_of_study_for_paperid(self, paperid,
                        table=None,
                        col_paperid=None,
                        col_fos=None):
        """Given a paper ID, return a list of Field of Study (FOS) IDs

        :paperid: paper ID (will also work with a list of paper IDs)
        :table: table object or name of table mapping papers to FOS
        :col_paperid: column (or name of column) for paper ID in the FOS table
        :col_fos: column (or name of column) for Field of Study ID in the FOS table
        :returns: list of FOS IDs (strings)

        """
        if table is None:
            table = self.tblname_paper_fos
        if col_paperid is None:
            col_paperid = self.colname_paperid
        if col_fos is None:
            col_fos = self.colname_paperfield
        tbl = self._get_table(table)
        col_paperid = self._get_col(col_paperid, tbl)
        col_fos = self._get_col(col_fos, tbl)

        paperid = parse_id(paperid)

        # get a list of FOS IDs
        sq = tbl.select().with_only_columns([col_fos])
        sq = sq.where(col_paperid.in_(paperid))
        r = self.engine.execute(sq).fetchall()
        return [str(row[0]) for row in r]
Ejemplo n.º 7
0
    def query_for_venue_counts(self,
                               paperids,
                               table=None,
                               col_venue=None,
                               col_paperid=None):
        """given a paperid or list of paperids, return the counts by venue
        Venues are e.g. journals, specified by the 'col_venue' param

        :paperids: paperid or list of paperids
        :table: table object or name of table
        :col_venue: column (or name of column) to count
        :col_paper: column (or name of column) with paper ids
        :returns: SQLAchemy response with columns (count, venue_id)

        """
        if table is None:
            table = self.tblname_nodes
        if col_venue is None:
            col_venue = self.colname_venue[0]
        if col_paperid is None:
            col_paperid = self.colname_paperid
        paperids = parse_id(paperids)
        tbl = self._get_table(table)
        col_venue = self._get_col(col_venue, tbl)
        col_paperid = self._get_col(col_paperid, tbl)
        sq = tbl.count(col_paperid.in_(paperids))
        sq = sq.where(col_venue > 0)  # TODO: THIS MAY NOT WORK WITH STRING IDS
        sq = sq.group_by(col_venue)
        sq = sq.column(col_venue)
        r = self.engine.execute(sq)
        return r
Ejemplo n.º 8
0
    def query_toplevel_fields_for_field_ids(self,
                                            fosids,
                                            table=None,
                                            col_fosid=None,
                                            col_toplevelid=None,
                                            weighted=True,
                                            col_weight=None):
        """Given a list of Field of Study (FOS) IDs, return a counter of toplevel FOS IDs with weights

        :fosids: Field of Study ID or list
        :table: table object or name of table mapping FOS IDs to Top Level IDs
        :col_fosid: column (or name of column) in the toplevel table for (lower level) FOS
        :col_toplevelid: column (or name of column) in the toplevel table for top level FOS
        :weighted: if true, will use the value of `col_weight` column in counting FOS. Default True
        :col_weight: column (or name of column) in the toplevel table with weights (e.g. 'Confidence')
        :returns: counter with keys: (toplevel) Field of Study ID and values: weights

        """
        if table is None:
            table = self.tblname_toplevelfield
        if col_fosid is None:
            col_fosid = self.colname_fosid
        if col_toplevelid is None:
            col_toplevelid = self.colname_toplevel
        if weighted:
            if col_weight is None:
                col_weight = self.colname_fosweight
        tbl = self._get_table(table)
        col_fosid = self._get_col(col_fosid, tbl)
        col_toplevelid = self._get_col(col_toplevelid, tbl)
        if weighted:
            col_weight = self._get_col(col_weight, tbl)

        fosids = parse_id(fosids)

        sq = tbl.select().with_only_columns([col_fosid, col_toplevelid])
        if weighted:
            sq = sq.column(col_weight)
        sq = sq.where(col_fosid.in_(fosids))
        r = self.engine.execute(sq)

        fos_count = Counter()
        matched = []
        if r.rowcount > 0:
            for i in range(r.rowcount):
                row = r.fetchone()
                matched.append(str(row[col_fosid]))
                if weighted:
                    fos_count[str(row[col_toplevelid])] += float(
                        row[col_weight])
                else:
                    fos_count[str(row[col_toplevelid])] += 1
        # assume any leftovers are top level and should be added back in
        for fosid in fosids:
            if fosid not in matched:
                fos_count[fosid] += 1
        return fos_count
Ejemplo n.º 9
0
    def query_toplevel_fields_for_field_ids(self, fosids, 
                        table=None,
                        col_fosid=None,
                        col_toplevelid=None,
                        weighted=True,
                        col_weight=None):
        """Given a list of Field of Study (FOS) IDs, return a counter of toplevel FOS IDs with weights

        :fosids: Field of Study ID or list
        :table: table object or name of table mapping FOS IDs to Top Level IDs
        :col_fosid: column (or name of column) in the toplevel table for (lower level) FOS
        :col_toplevelid: column (or name of column) in the toplevel table for top level FOS
        :weighted: if true, will use the value of `col_weight` column in counting FOS. Default True
        :col_weight: column (or name of column) in the toplevel table with weights (e.g. 'Confidence')
        :returns: counter with keys: (toplevel) Field of Study ID and values: weights

        """
        if table is None:
            table = self.tblname_toplevelfield
        if col_fosid is None:
            col_fosid = self.colname_fosid
        if col_toplevelid is None:
            col_toplevelid = self.colname_toplevel
        if weighted:
            if col_weight is None:
                col_weight = self.colname_fosweight
        tbl = self._get_table(table)
        col_fosid = self._get_col(col_fosid, tbl)
        col_toplevelid = self._get_col(col_toplevelid, tbl)
        if weighted:
            col_weight = self._get_col(col_weight, tbl)

        fosids = parse_id(fosids)

        sq = tbl.select().with_only_columns([col_fosid, col_toplevelid])
        if weighted:
            sq = sq.column(col_weight)
        sq = sq.where(col_fosid.in_(fosids))
        r = self.engine.execute(sq)

        fos_count = Counter()
        matched = []
        if r.rowcount > 0:
            for i in range(r.rowcount):
                row = r.fetchone()
                matched.append(str(row[col_fosid]))
                if weighted:
                    fos_count[str(row[col_toplevelid])] += float(row[col_weight])
                else:
                    fos_count[str(row[col_toplevelid])] += 1
        # assume any leftovers are top level and should be added back in
        for fosid in fosids:
            if fosid not in matched:
                fos_count[fosid] += 1
        return fos_count
Ejemplo n.º 10
0
    def get_EF(self, paperid):
        """Given a paper id, return the Eigenfactor score

        :paperid: TODO
        :returns: TODO

        """
        tbl = self._get_table('rank')
        col_paperid = self._get_col(self.colname_paperid, tbl)
        col_EF = self._get_col('EF', tbl)
        paperid = parse_id(paperid)
        sq = tbl.select(col_paperid.in_(paperid))
        sq = sq.with_only_columns([col_EF])
        return self.engine.execute(sq).scalar()
Ejemplo n.º 11
0
    def get_EF(self, paperid):
        """Given a paper id, return the Eigenfactor score

        :paperid: TODO
        :returns: TODO

        """
        tbl = self._get_table('rank')
        col_paperid = self._get_col(self.colname_paperid, tbl)
        col_EF = self._get_col('EF', tbl)
        paperid = parse_id(paperid)
        sq = tbl.select(col_paperid.in_(paperid))
        sq = sq.with_only_columns([col_EF])
        return self.engine.execute(sq).scalar()
Ejemplo n.º 12
0
    def get_author_id_list(self, paperid):
        """Given a paper id, return a list of author IDs

        :paperid: TODO
        :returns: TODO

        """
        tbl = self._get_table(self.tblname_paper_authors)
        col_paperid = self._get_col(self.colname_paperid, tbl)
        col_authorid = self._get_col(self.colname_authorid, tbl)
        paperid = parse_id(paperid)
        sq = tbl.select(col_paperid.in_(paperid))
        sq = sq.with_only_columns([col_authorid])
        r = self.engine.execute(sq)
        return [x[0] for x in r.fetchall()]
Ejemplo n.º 13
0
    def get_author_id_list(self, paperid):
        """Given a paper id, return a list of author IDs

        :paperid: TODO
        :returns: TODO

        """
        tbl = self._get_table(self.tblname_paper_authors)
        col_paperid = self._get_col(self.colname_paperid, tbl)
        col_authorid = self._get_col(self.colname_authorid, tbl)
        paperid = parse_id(paperid)
        sq = tbl.select(col_paperid.in_(paperid))
        sq = sq.with_only_columns([col_authorid])
        r = self.engine.execute(sq)
        return [x[0] for x in r.fetchall()]
    def get_venue_counts(self,
                         paperids,
                         colname_venue=None,
                         query_threshold=100000):
        """Get the list of venue counts for paperids in the shape of venue_ids

        :paperids: paperid or list of paperids
        :venue_colnames: list of venue column names (should be a list of length
        1 if only one column). defaults to the database's colname_venue
        property
        :query_threshold: maximum number for query. will do multiple queries if
        this threshold is exceeded.
        :returns: pandas Series of integers

        """
        if colname_venue is None:
            colname_venue = self._db.colname_venue
        if type(paperids) is not pd.Series:
            paperids = parse_id(paperids)
            paperids = pd.Series(paperids)
        lower = 0
        upper = lower + query_threshold
        venue_prefixes = {}
        for i in range(len(colname_venue)):
            venue_prefixes[string.letters[i]] = colname_venue[
                i]  # 'A', 'B', etc.
        venue_counts = Counter()
        while True:
            subset = paperids.iloc[lower:upper]
            for prefix in venue_prefixes.keys():
                thiscolname = venue_prefixes[prefix]
                r = self._db.query_for_venue_counts(subset,
                                                    col_venue=thiscolname)
                venue_counts = self._update_counter(venue_counts, r, prefix)
            # query again
            lower = upper
            upper = upper + query_threshold
            if lower > len(paperids):
                break
        counts_df = pd.DataFrame.from_dict(venue_counts, orient='index')
        counts_df = counts_df.reindex(index=self.venue_ids)
        counts_df = counts_df.fillna(value=0)
        if counts_df.empty:
            return None
        else:
            counts_series = counts_df.ix[:, 0].astype(int)
            return counts_series
    def get_venue_counts(self, paperids, colname_venue=None, query_threshold=100000):
        """Get the list of venue counts for paperids in the shape of venue_ids

        :paperids: paperid or list of paperids
        :venue_colnames: list of venue column names (should be a list of length
        1 if only one column). defaults to the database's colname_venue
        property
        :query_threshold: maximum number for query. will do multiple queries if
        this threshold is exceeded.
        :returns: pandas Series of integers

        """
        if colname_venue is None:
            colname_venue = self._db.colname_venue
        if type(paperids) is not pd.Series:
            paperids = parse_id(paperids)
            paperids = pd.Series(paperids)
        lower = 0
        upper = lower + query_threshold
        venue_prefixes = {}
        for i in range(len(colname_venue)):
            venue_prefixes[string.letters[i]] = colname_venue[i]  # 'A', 'B', etc.
        venue_counts = Counter()
        while True:
            subset = paperids.iloc[lower:upper]
            for prefix in venue_prefixes.keys():
                thiscolname = venue_prefixes[prefix]
                r = self._db.query_for_venue_counts(subset, col_venue=thiscolname)
                venue_counts = self._update_counter(venue_counts, r, prefix)
            # query again
            lower = upper
            upper = upper + query_threshold
            if lower > len(paperids):
                break
        counts_df = pd.DataFrame.from_dict(venue_counts, orient='index')
        counts_df = counts_df.reindex(index=self.venue_ids)
        counts_df = counts_df.fillna(value=0)
        if counts_df.empty:
            return None
        else:
            counts_series = counts_df.ix[:, 0].astype(int)
            return counts_series
Ejemplo n.º 16
0
    def get_toplevel_cluster(self, paperid):
        """Given a paper id, return the toplevel infomap cluster

        :paperid: TODO
        :returns: TODO

        """
        tbl = self._get_table('tree')
        col_paperid = self._get_col(self.colname_paperid, tbl)
        col_cluster = self._get_col('cl', tbl)
        paperid = parse_id(paperid)
        sq = tbl.select(col_paperid.in_(paperid))
        sq = sq.with_only_columns([col_cluster])
        r = self.engine.execute(sq).fetchone()
        if r:
            cl = r[0]
            toplevel_cl = cl.split(':')[0]
            return toplevel_cl
        else:
            return 0
Ejemplo n.º 17
0
    def get_toplevel_cluster(self, paperid):
        """Given a paper id, return the toplevel infomap cluster

        :paperid: TODO
        :returns: TODO

        """
        tbl = self._get_table('tree')
        col_paperid = self._get_col(self.colname_paperid, tbl)
        col_cluster = self._get_col('cl', tbl)
        paperid = parse_id(paperid)
        sq = tbl.select(col_paperid.in_(paperid))
        sq = sq.with_only_columns([col_cluster])
        r = self.engine.execute(sq).fetchone()
        if r:
            cl = r[0]
            toplevel_cl = cl.split(':')[0]
            return toplevel_cl
        else:
            return 0
Ejemplo n.º 18
0
    def query_for_citations(self,
                            paperids,
                            direction='out',
                            table=None,
                            col_citing=None,
                            col_cited=None,
                            return_df=False):
        """Take a list of paper ids and return the citations as either a Series or DataFrame

        :paperids: paperid or list of paperids
        :direction: 'out' (default) for out-citations. 'in' for incoming citations
        :table: table object or name of table
        :col_citing: column (or name of column) for citing papers
        :col_cited: column (or name of column) for cited papers
        :return_df: return a dataframe rather than a series (default False)
        :returns: either a Series of paper IDs or the DataFrame of both citing/cited

        """
        if table is None:
            table = self.tblname_links
        if col_citing is None:
            col_citing = self.colname_citing
        if col_cited is None:
            col_cited = self.colname_cited
        paperids = parse_id(paperids)
        tbl = self._get_table(table)
        col_citing = self._get_col(col_citing, tbl)
        col_cited = self._get_col(col_cited, tbl)
        cols = (col_citing, col_cited)
        if direction.lower() == 'in':
            cols = (col_cited, col_citing)
        sq = tbl.select(cols[0].in_(paperids))
        sq = sq.with_only_columns(cols)
        result = self.read_sql(sq)
        if return_df is True:
            return result
        else:
            return result.ix[:, 1]
Ejemplo n.º 19
0
    def query_field_of_study_name(self,
                                  fosid,
                                  squeeze=True,
                                  table=None,
                                  col_fosid=None,
                                  col_fosname=None):
        """Given a Field of Study (FOS) ID, return the name of that field
        If a single fosid is given, return a string. If multiple fosids are given, return a dictionary

        :fosid: Field of Study ID or list
        :squeeze: if there is only one FOS ID, return just the name instead of a dictionary
        :table: table object or name of table with FOS IDs and names
        :col_fosid: column (or name of column) in the table for FOS
        :col_fosname: column (or name of column) in the table for FOS name
        :returns: FOS name or names as string or dictionary with keys FOS ID and values FOS name

        """
        if table is None:
            table = self.tblname_fields
        if col_fosid is None:
            col_fosid = self.colname_fosid
        if col_fosname is None:
            col_fosname = self.colname_fosname

        tbl = self._get_table(table)
        col_fosid = self._get_col(col_fosid, tbl)
        col_fosname = self._get_col(col_fosname, tbl)

        fosid = parse_id(fosid)

        sq = tbl.select(col_fosid.in_(fosid))
        sq = sq.with_only_columns([col_fosid, col_fosname])
        r = self.engine.execute(sq).fetchall()
        if len(fosid) == 1 and squeeze is True:
            if len(r) == 1:
                return r[0][col_fosname]
        else:
            return {row[col_fosid]: row[col_fosname] for row in r}
Ejemplo n.º 20
0
    def query_for_citations(self, paperids, 
            direction='out', 
            table=None, 
            col_citing=None, 
            col_cited=None, 
            return_df=False):
        """Take a list of paper ids and return the citations as either a Series or DataFrame

        :paperids: paperid or list of paperids
        :direction: 'out' (default) for out-citations. 'in' for incoming citations
        :table: table object or name of table
        :col_citing: column (or name of column) for citing papers
        :col_cited: column (or name of column) for cited papers
        :return_df: return a dataframe rather than a series (default False)
        :returns: either a Series of paper IDs or the DataFrame of both citing/cited

        """
        if table is None:
            table = self.tblname_links
        if col_citing is None:
            col_citing = self.colname_citing
        if col_cited is None:
            col_cited = self.colname_cited
        paperids = parse_id(paperids)
        tbl = self._get_table(table)
        col_citing = self._get_col(col_citing, tbl)
        col_cited = self._get_col(col_cited, tbl)
        cols = (col_citing, col_cited)
        if direction.lower() == 'in':
            cols = (col_cited, col_citing)
        sq = tbl.select(cols[0].in_(paperids))
        sq = sq.with_only_columns(cols)
        result = self.read_sql(sq)
        if return_df is True:
            return result
        else:
            return result.ix[:, 1]
Ejemplo n.º 21
0
    def query_field_of_study_name(self, fosid,
                        squeeze=True,
                        table=None,
                        col_fosid=None,
                        col_fosname=None):
        """Given a Field of Study (FOS) ID, return the name of that field
        If a single fosid is given, return a string. If multiple fosids are given, return a dictionary

        :fosid: Field of Study ID or list
        :squeeze: if there is only one FOS ID, return just the name instead of a dictionary
        :table: table object or name of table with FOS IDs and names
        :col_fosid: column (or name of column) in the table for FOS
        :col_fosname: column (or name of column) in the table for FOS name
        :returns: FOS name or names as string or dictionary with keys FOS ID and values FOS name

        """
        if table is None:
            table = self.tblname_fields
        if col_fosid is None:
            col_fosid = self.colname_fosid
        if col_fosname is None:
            col_fosname = self.colname_fosname

        tbl = self._get_table(table)
        col_fosid = self._get_col(col_fosid, tbl)
        col_fosname = self._get_col(col_fosname, tbl)

        fosid = parse_id(fosid)

        sq = tbl.select(col_fosid.in_(fosid))
        sq = sq.with_only_columns([col_fosid, col_fosname])
        r = self.engine.execute(sq).fetchall()
        if len(fosid)==1 and squeeze is True:
            if len(r)==1:
                return r[0][col_fosname]
        else:
            return {row[col_fosid]: row[col_fosname] for row in r}
Ejemplo n.º 22
0
    def get_paperids_from_authorid(self,
                                   authorids,
                                   table=None,
                                   col_authorid=None,
                                   col_paperid=None,
                                   return_df=False):
        """Return the paper IDs associated with an author ID or list

        :authorids: author ID or list
        :table: table object or name of table mapping Author IDs to Paper IDs
        :col_authorid: column (or name of column) in the table for Author ID
        :col_paperid: column (or name of column) in the table for Paper ID
        :return_df: return a dataframe rather than a series (default False)
        :returns: either a Series of paper IDs or the full DataFrame of the table

        """
        if table is None:
            table = self.tblname_paper_authors
        if col_authorid is None:
            col_authorid = self.colname_authorid
        if col_paperid is None:
            col_paperid = self.colname_paperid

        tbl = self._get_table(table)
        col_authorid = self._get_col(col_authorid, tbl)
        col_paperid = self._get_col(col_paperid, tbl)

        authorids = parse_id(authorids)

        sq = tbl.select(col_authorid.in_(authorids))
        result = self.read_sql(sq)
        if return_df:
            return result
        if result.empty:
            return pd.Series()
        return result.ix[:, col_paperid.name]